{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Attention Weighted word averaging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import random\n",
    "import numpy as np\n",
    "from collections import Counter\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "\n",
    "random.seed(2019)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "VOCAB_SIZE = 14_828\n",
    "\n",
    "EPOCHS = 5\n",
    "BATCH_SIZE = 32\n",
    "LEARNING_RATE = 0.01\n",
    "BEST_VALID_LOSS = float('inf')\n",
    "\n",
    "EMBEDDING_DIM = 100\n",
    "OUTPUT_DIM = 1\n",
    "\n",
    "train_file = \"data/senti.train.tsv\"\n",
    "eval_file = \"data/senti.dev.tsv\"\n",
    "test_file = \"data/senti.test.tsv\"\n",
    "\n",
    "USE_CUDA = torch.cuda.is_available()\n",
    "DEVICE = torch.device('cuda:1' if USE_CUDA else 'cpu')\n",
    "NUM_CUDA = torch.cuda.device_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_text_file(filename):\n",
    "    \"\"\"将样本的特征与标签分开，并将样本特征分词\"\"\"\n",
    "    sentences = []\n",
    "    label = []\n",
    "    with open(filename, \"r\") as f:\n",
    "        sent_list  = [line.strip().split('\\t') for line in f]\n",
    "    for sample in sent_list:\n",
    "        sentences.append(sample[0].lower().split(\" \"))\n",
    "        label.append(int(sample[-1]))\n",
    "    return sentences, label\n",
    "\n",
    "\n",
    "def build_word_dic(sentences_list, vocab_size=20_000):\n",
    "    \"\"\"构建words_set, word2idx, idx2word\"\"\"\n",
    "    words_list = [w for line in sentences_list for w in line]\n",
    "    counter = Counter(words_list)\n",
    "    words_topn = counter.most_common(vocab_size)\n",
    "    words_set = [item[0] for item in words_topn]\n",
    "    words_set = ['<pad>', \"<unk>\"] + words_set\n",
    "    word2idx = {w:i for i, w in enumerate(words_set)}\n",
    "    idx2word = {i:w for i, w in enumerate(words_set)}\n",
    "    return words_topn, word2idx, idx2word\n",
    "\n",
    "\n",
    "def build_x_y(word2idx, sentences_list, label_list, sent_len=30):\n",
    "    \"\"\"构建输入模型的数据，对每个单词编码，每个句子通过添加pading保持一样长\"\"\"\n",
    "    x = []\n",
    "    y = []\n",
    "    for sent, label in zip(sentences_list, label_list):\n",
    "        word_x = [0]*sent_len\n",
    "        if len(sent) > sent_len:\n",
    "            sent = sent[:sent_len]\n",
    "        for i, w in enumerate(sent):\n",
    "            if w in word2idx:\n",
    "                word_x[i] = word2idx[w]\n",
    "            else:\n",
    "                word_x[i] = word2idx['<unk>']\n",
    "        x.append(word_x)\n",
    "        y.append(label)\n",
    "    return x, y\n",
    "\n",
    "\n",
    "def build_batch_data(data, label, batch_size=32):\n",
    "    \"\"\"构建tensor格式的批次数据，返回batch列表，每个batch为二元组包含feature和label\"\"\"\n",
    "    batch_data = []\n",
    "    # 打乱顺序\n",
    "    data_labels = [[x, y] for x, y in zip(data, label)]\n",
    "    random.shuffle(data_labels)\n",
    "    xlist = [item[0] for item in data_labels]\n",
    "    ylist = [item[1] for item in data_labels]\n",
    "    \n",
    "    x_tensor = torch.tensor(xlist, dtype=torch.long)\n",
    "    y_tensor = torch.tensor(ylist, dtype=torch.float)\n",
    "    n, dim = x_tensor.size()\n",
    "    for start in range(0, n, batch_size):\n",
    "        end = start + batch_size\n",
    "        if end > n:\n",
    "            break\n",
    "            xbatch = x_tensor[start: ]\n",
    "            ybatch = y_tensor[start: ]\n",
    "            print(\"最后一个batch size:\", dbatch.size())\n",
    "        else:\n",
    "            xbatch = x_tensor[start: end]\n",
    "            ybatch = y_tensor[start: end]\n",
    "        batch_data.append((xbatch, ybatch))\n",
    "    return batch_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_sentences, train_label = load_text_file(train_file)\n",
    "eval_sentences, eval_label = load_text_file(eval_file)\n",
    "test_sentences, test_label = load_text_file(test_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "处理后的样本与标签： ['hide', 'new', 'secretions', 'from', 'the', 'parental', 'units'] 0\n",
      "各个数据集样本数量：\n",
      "67349 67349\n",
      "872 872\n",
      "1821 1821\n",
      "各数据集最长最短句子单词数：\n",
      "52 1\n",
      "47 2\n",
      "56 2\n"
     ]
    }
   ],
   "source": [
    "print(\"处理后的样本与标签：\", train_sentences[0], train_label[0])\n",
    "print(\"各个数据集样本数量：\")\n",
    "print(len(train_sentences), len(train_label))\n",
    "print(len(eval_sentences), len(eval_label))\n",
    "print(len(test_sentences), len(test_label))\n",
    "\n",
    "print(\"各数据集最长最短句子单词数：\")\n",
    "print(max([len(s) for s in train_sentences]), min([len(s) for s in train_sentences]))\n",
    "print(max([len(s) for s in eval_sentences]), min([len(s) for s in eval_sentences]))\n",
    "print(max([len(s) for s in test_sentences]), min([len(s) for s in test_sentences]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_seq_len = 56\n",
    "words_set, word2idx, idx2word = build_word_dic(train_sentences, vocab_size=VOCAB_SIZE)\n",
    "train_x, train_y = build_x_y(word2idx, train_sentences, train_label,sent_len=max_seq_len)\n",
    "eval_x, eval_y = build_x_y(word2idx, eval_sentences, eval_label,sent_len=max_seq_len)\n",
    "test_x, test_y = build_x_y(word2idx, test_sentences, test_label,sent_len=max_seq_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "词典长度: 14828 14830 14830\n",
      "训练集样本数量: 67349 67349\n"
     ]
    }
   ],
   "source": [
    "print(\"词典长度:\", len(words_set), len(word2idx), len(idx2word))\n",
    "print(\"训练集样本数量:\", len(train_x), len(train_y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = build_batch_data(train_x, train_y, batch_size=BATCH_SIZE)\n",
    "eval_data = build_batch_data(eval_x, eval_y, batch_size=BATCH_SIZE)\n",
    "test_data = build_batch_data(test_x, test_y, batch_size=BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AttAvgModel(nn.Module):\n",
    "    def __init__(self, vocab_size, embed_dim, output_size, pad_idx):\n",
    "        super(AttAvgModel, self).__init__()\n",
    "        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)\n",
    "        initrange = 0.1\n",
    "        self.embedding.weight.data.uniform_(-initrange, initrange)\n",
    "        # 计算 Attention 向量\n",
    "        self.u = nn.Parameter(torch.randn(embed_dim))\n",
    "        self.fc = nn.Linear(embed_dim, output_size, bias=False)\n",
    "        \n",
    "    def forward(self, text):\n",
    "        # [batch, seq_len] -> [batch, seq_len, emb_dim]\n",
    "        embed = self.embedding(text)\n",
    "        \n",
    "        # 扩展u这组参数，为的是计算和词向量的相似度，最后得到权重\n",
    "        # [emb_dim] -> [batch, seq_len, emb_dim]\n",
    "        u = self.u.repeat(embed.size(0), embed.size(1), 1)  # 在最后一个参数上重复自己\n",
    "        \n",
    "        # 计算余弦相似度\n",
    "        cos = F.cosine_similarity(embed, u, dim=2)   # [batch, seq_len] 计算每个词向量和对应的u向量的余弦相似度\n",
    "        \n",
    "        # 计算权重 \n",
    "        alpha = F.softmax(cos, dim=1)   # [bacth, seq_len]  softmax的作用是使得每个序列的个单词权重之和为1\n",
    "        alpha = alpha.unsqueeze(2)      # [bacth, seq_len, 1]\n",
    "        \n",
    "        # embed*alpha => [bacth, seq_len, emb_dim] 相当于每个词向量（的每个元素）都乘上一个权重\n",
    "        h_attn = torch.sum(embed*alpha, dim=1).squeeze(1)  # 在1维度上sum 相当于把序列（句子）求和[batch, emb_dim]\n",
    "        \n",
    "        # [batch, emb_dim] -> [batch, output_size]即[batch, 1]\n",
    "        out = self.fc(h_attn) \n",
    "      \n",
    "        return out\n",
    "    \n",
    "    def get_embed_weight(self):\n",
    "        \"\"\"获取embedding层参数\"\"\"\n",
    "        return self.embedding.weight.data\n",
    "    \n",
    "    def get_u(self):\n",
    "        \"\"\"attention向量\"\"\"\n",
    "        return self.u\n",
    "\n",
    "\n",
    "def binary_accuracy(preds, y):\n",
    "    \"\"\"计算准确率\"\"\"\n",
    "    rounded_preds = torch.round(torch.sigmoid(preds))\n",
    "    correct = (rounded_preds == y).float()  \n",
    "    acc = correct.sum()/len(correct)\n",
    "    return acc\n",
    "\n",
    "\n",
    "def train(model, device, iterator, optimizer, criterion):\n",
    "    \"\"\"训练函数\"\"\"\n",
    "    \n",
    "    epoch_loss = 0\n",
    "    epoch_acc = 0\n",
    "    model.train()\n",
    "    \n",
    "    for x, y in iterator:\n",
    "        x, y = x.to(device), y.to(device) # torch.int64\n",
    "        optimizer.zero_grad()\n",
    "        predictions = model(x).squeeze(1)  # torch.float32 \n",
    "        \n",
    "        loss = criterion(predictions, y)\n",
    "        acc = binary_accuracy(predictions, y)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        epoch_loss += loss.item()\n",
    "        epoch_acc += acc.item()\n",
    "        \n",
    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)\n",
    "\n",
    "\n",
    "def evaluate(model, device, iterator, criterion):\n",
    "    \"\"\"验证函数\"\"\"\n",
    "    epoch_loss = 0\n",
    "    epoch_acc = 0\n",
    "    model.eval()\n",
    "    \n",
    "    with torch.no_grad():\n",
    "        for x, y in iterator:\n",
    "            x, y = x.to(device), y.to(device)\n",
    "            predictions = model(x).squeeze(1)\n",
    "            loss = criterion(predictions, y)\n",
    "            acc = binary_accuracy(predictions, y)\n",
    "            epoch_loss += loss.item()\n",
    "            epoch_acc += acc.item()\n",
    "        \n",
    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)\n",
    "\n",
    "\n",
    "def count_parameters(model):\n",
    "    \"\"\"统计模型的参数量\"\"\"\n",
    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
    "\n",
    "\n",
    "def epoch_time(start_time, end_time):\n",
    "    \"\"\"计算时间差，返回分钟, 秒钟\"\"\"\n",
    "    elapsed_time = end_time - start_time\n",
    "    elapsed_mins = int(elapsed_time / 60)\n",
    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
    "    return elapsed_mins, elapsed_secs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INPUT_DIM 14830\n",
      "模型有1,483,200个可调节参数, 大约5.657958984375 M.\n",
      "device: cuda:1\n"
     ]
    }
   ],
   "source": [
    "INPUT_DIM = len(words_set) + 2\n",
    "print(\"INPUT_DIM\", INPUT_DIM)\n",
    "PAD_IDX = word2idx['<pad>']\n",
    "\n",
    "model = AttAvgModel(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, PAD_IDX)\n",
    "print(f'模型有{count_parameters(model):,}个可调节参数, 大约{count_parameters(model)*4/1024/1024} M.')\n",
    "\n",
    "model = model.to(DEVICE)\n",
    "print(\"device:\", DEVICE)\n",
    "\n",
    "    \n",
    "optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)\n",
    "criterion = nn.BCEWithLogitsLoss()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/anaconda3/lib/python3.6/site-packages/torch/serialization.py:251: UserWarning: Couldn't retrieve source code for container of type AttAvgModel. It won't be checked for correctness upon loading.\n",
      "  \"type \" + obj.__name__ + \". It won't be checked \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "***Save Best Model attention-wavg-model.pth***\n",
      "Epoch: 01 | Epoch Time: 0m 9s\n",
      "\tTrain Loss: 0.327 | Train Acc: 85.78%\n",
      "\t Val. Loss: 0.493 |  Val. Acc: 81.25%\n",
      "Epoch: 02 | Epoch Time: 0m 10s\n",
      "\tTrain Loss: 0.194 | Train Acc: 92.50%\n",
      "\t Val. Loss: 0.588 |  Val. Acc: 79.86%\n",
      "Epoch: 03 | Epoch Time: 0m 10s\n",
      "\tTrain Loss: 0.163 | Train Acc: 93.81%\n",
      "\t Val. Loss: 0.685 |  Val. Acc: 80.09%\n",
      "Epoch: 04 | Epoch Time: 0m 9s\n",
      "\tTrain Loss: 0.138 | Train Acc: 94.90%\n",
      "\t Val. Loss: 0.813 |  Val. Acc: 79.86%\n",
      "Epoch: 05 | Epoch Time: 0m 9s\n",
      "\tTrain Loss: 0.117 | Train Acc: 95.70%\n",
      "\t Val. Loss: 0.955 |  Val. Acc: 80.44%\n"
     ]
    }
   ],
   "source": [
    "model_name = 'attention-wavg-model.pth'\n",
    "for epoch in range(1, EPOCHS+1):\n",
    "    start_time = time.time()\n",
    "    train_loss, train_acc = train(model, DEVICE, train_data, optimizer, criterion)\n",
    "    valid_loss, valid_acc = evaluate(model, DEVICE, eval_data, criterion)\n",
    "    end_time = time.time()\n",
    "\n",
    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
    "    if valid_loss < BEST_VALID_LOSS:\n",
    "        BEST_VALID_LOSS = valid_loss\n",
    "        torch.save(model, model_name)\n",
    "        print(f'***Save Best Model {model_name}***')\n",
    "    \n",
    "    print(f'Epoch: {epoch :02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Loss: 0.4549624267965555 | Test Acc: 0.8141741071428571 |\n"
     ]
    }
   ],
   "source": [
    "model = torch.load(model_name)\n",
    "test_loss, test_acc = evaluate(model, DEVICE, test_data, criterion)\n",
    "print('Test Loss: {0} | Test Acc: {1} |'.format(test_loss, test_acc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分析词向量和Attention向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = torch.load(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_embedding = model.get_embed_weight()  # 注意多GPU的时候是这样\n",
    "u = model.get_u()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([14830, 100]) torch.Size([100])\n"
     ]
    }
   ],
   "source": [
    "print(word_embedding.size(), u.size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([14830, 100])\n"
     ]
    }
   ],
   "source": [
    "u_repeat = u.repeat(word_embedding.size()[0], 1)\n",
    "print(u_repeat.size())\n",
    "cos_sim = torch.cosine_similarity(word_embedding, u_repeat, dim=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "cos_score, cos_idx = cos_sim.sort()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine similarity最高的15个单词：\n",
      "nose : 0.8122435808181763\n",
      "soccer : 0.819983184337616\n",
      "telanovela : 0.8228465914726257\n",
      "tank : 0.8235946297645569\n",
      "hopelessly : 0.823834240436554\n",
      "connected : 0.828292965888977\n",
      "rises : 0.8358004689216614\n",
      "n't : 0.8396202921867371\n",
      "down : 0.8575065732002258\n",
      "induces : 0.8670101165771484\n",
      "seems : 0.8749212026596069\n",
      "not : 0.8817094564437866\n",
      "wrong : 0.885643720626831\n",
      "minutes : 0.9224189519882202\n",
      "or : 0.9354701042175293\n"
     ]
    }
   ],
   "source": [
    "print(\"Cosine similarity最高的15个单词：\")\n",
    "for i, s in zip(cos_idx[-15:], cos_score[-15: ]):\n",
    "    print(f\"{idx2word[i.item()]} : {s.item()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine similarity最低的15个单词：\n",
      "caine : -0.9839131236076355\n",
      "boys : -0.9837092161178589\n",
      "stardom : -0.9834532141685486\n",
      "player : -0.9832771420478821\n",
      "roots : -0.9796615839004517\n",
      "sometimes : -0.9795486927032471\n",
      "contrivances : -0.9772619605064392\n",
      "describe : -0.9753174185752869\n",
      "words : -0.9740455746650696\n",
      "purpose : -0.9728651642799377\n",
      "italian : -0.9713523387908936\n",
      "actually : -0.9706254601478577\n",
      "quite : -0.9703256487846375\n",
      "delivery : -0.9696605205535889\n",
      "enervating : -0.9686934351921082\n"
     ]
    }
   ],
   "source": [
    "print(\"Cosine similarity最低的15个单词：\")\n",
    "for i, s in zip(cos_idx[: 15], cos_score[: 15]):\n",
    "    print(f\"{idx2word[i.item()]} : {s.item()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 相同单词在不同语境下attention的变化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "661\n"
     ]
    }
   ],
   "source": [
    "words_freq = []\n",
    "for w in words_set:\n",
    "    if w[-1] >100:\n",
    "        words_freq.append(w[0])\n",
    "print(len(words_freq))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"the , a and of . to 's is that in it as with an film its for movie this you but be on n't by more -- one at than has not about his from are like so or all have most story ' good ... into out too who -rrb- up characters i funny -lrb- comedy if just no does much what can even ` your their will time some bad `` little '' very way which best any love been life make work enough there only he makes us new movies never something do they through was well action great would own made director humor many we really performances plot drama her how could films sense see such better other fun audience people every off two without cast nothing feel both when being look character may should entertaining acting real ever often performance them long : while still world because script also interesting another heart kind 're those hollywood dialogue watch minutes first screen down few get big over far thriller might less hard human moments actors tale compelling romantic rather cinema had year family almost material end watching seen - worth 've seem itself picture original take before my seems were documentary emotional our quite after find old these visual comes man things back fascinating moving sweet right works between feels here scenes full come piece direction care yet ; music go dull me going takes years special ultimately young ca keep making anything laughs 'll times why american worst smart give experience comic enjoyable least cinematic lot part where beautiful entertainment history style sometimes though thing art clever kids away gives again him together bit she intelligence dark idea gets amusing engaging same powerful once women genre intelligent star energy subject did charming surprisingly actually summer anyone charm want screenplay point filmmaking short place narrative solid pretty flick around feeling nearly feature silly simply whose manages strong face predictable wit think enjoy war truly offers show say deeply goes perfect know satisfying then power fans whole theater need effort always becomes done spirit fresh beautifully true trying premise half quirky three since filmmakers suspense tone dramatic portrait hilarious horror under last interest fine flat effects rare high rich series hours probably children everyone romance ideas touching ? familiar looking remarkable modern study 'd especially imagination wonderful pleasure classic boring small easy everything set exercise leave title level instead stuff honest culture past dumb intriguing tv wo video filmmaker light turn already actor audiences storytelling sad lack matter recent stories obvious mind written put despite talent ending terrific images french memorable project visually serious woman completely adventure become opera beauty talented gentle camera likely looks mess emotionally fails ride day slow sure cold having reason himself head cliches gorgeous directed beyond inside mr. jokes left men bland proves melodrama shot ways low impossible easily run above stupid thoughtful hour contrived excellent must simple ugly eyes different debut complex tired else fairly lacks viewer otherwise de believe shows brilliant viewers comedies each sort warmth passion black certainly writing turns particularly attempt play welcome wrong violence lost formula social cheap themselves genuine soap personal role delightful thoroughly crime either book sequences animation plays line hero version sex historical impressive barely home seeing appealing nor fact gags along quality clichés worse change got adults old-fashioned found lives middle surprising engrossing death running girl ambitious next message important creative fantasy able now live pretentious worthy ! sequel 'm decent psychological warm ends tragedy nice imagine entirely none perfectly michael waste creepy act remains sit deep concept unfunny laugh rock pictures job unsettling journey inventive usual try insight winning painful cool vision john unique attention convincing bring neither moral mystery stylish satire nature thin leaves master knows success side against believable artist lacking awful elements tedious lead reality seat working shallow mood situations view epic considerable appeal period provocative falls moment create days sentimental political scene sensitive watchable endearing cinematography road hackneyed\""
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\" \".join(words_freq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def word2sentences(sent_list, words_set, word2idx, freq=100):\n",
    "    words_freq = []\n",
    "    for w in words_set:\n",
    "        if w[-1] >100:\n",
    "            words_freq.append(w[0])\n",
    "    print(len(words_freq), words_freq[0])\n",
    "    w2sents = {}\n",
    "    w2sentnums = {}\n",
    "    for w in words_freq:\n",
    "        w2sents[w] = []\n",
    "        w2sentnums[w] = []\n",
    "        for s in sent_list:\n",
    "            if w in s:\n",
    "                w2sents[w].append(s)\n",
    "                w2sentnums[w].append([word2idx[word] for word in s])\n",
    "    return words_freq, w2sents, w2sentnums        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "661 the\n"
     ]
    }
   ],
   "source": [
    "words_freq, w2sents, w2sentnums= word2sentences(train_sentences, words_set, word2idx, freq=100) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19892"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(w2sents['the'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19892"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(w2sentnums['the'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_attentions(sentence, word_embedding, u, word2idx):\n",
    "    \"\"\"计算一个句子中每个单词在句子中的Attention，返回单词与Attention值的字典\"\"\"\n",
    "    num_sentence = [word2idx[w] for w in sentence]\n",
    "    s_embed = word_embedding[num_sentence]\n",
    "    u = u.repeat(s_embed.size(0), 1)\n",
    "    score = torch.cosine_similarity(s_embed, u, dim=1)\n",
    "    attn = torch.softmax(score, dim=0)\n",
    "    return {w:a for w,a in zip(sentence, attn.tolist()) }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = torch.load(model_name)\n",
    "word_embedding = model.get_embed_weight()   # 注意多GPU的时候中间加上module\n",
    "u = model.get_u()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_word_sentens_attn_dic(w2sents, word_embedding, u, word2idx):\n",
    "    word_sentens_attn_dic = {}\n",
    "    word_attention_li = {}\n",
    "    for word, sent_list in w2sents.items():\n",
    "        word_sentens_attn_dic[word] = []\n",
    "        word_attention_li[word] = []\n",
    "        for sentence in sent_list:\n",
    "            dic = get_attentions(sentence, word_embedding, u, word2idx)\n",
    "            word_sentens_attn_dic[word].append(dic)\n",
    "            word_attention_li[word].append(dic[word])\n",
    "    return word_sentens_attn_dic, word_attention_li"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_sentens_attn_dic, word_attention_li = get_word_sentens_attn_dic(w2sents, word_embedding, u, word2idx)  # 这步很慢"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "def meam_std_list(word_attention_li):\n",
    "    \"\"\"计算Attentions的平均值和标准差，并按标准差排序\"\"\"\n",
    "    word_mean_std_li = []\n",
    "    for w in word_attention_li:\n",
    "        arr = np.array(word_attention_li[w])\n",
    "        word_mean_std_li.append((w, arr.mean(), arr.std()))\n",
    "    word_mean_std_li = sorted(word_mean_std_li, key=lambda x:x[2], reverse=True)\n",
    "    return word_mean_std_li"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.1190217062830925,\n",
       " 0.0888654813170433,\n",
       " 0.06562710553407669,\n",
       " 0.04432917386293411,\n",
       " 0.10898833721876144,\n",
       " 0.2384878396987915,\n",
       " 0.10173339396715164,\n",
       " 0.10063987225294113,\n",
       " 0.03695022687315941,\n",
       " 0.05876564234495163,\n",
       " 0.1934322565793991,\n",
       " 0.10889307409524918,\n",
       " 0.03232577070593834,\n",
       " 0.11339320987462997,\n",
       " 0.03404928743839264,\n",
       " 0.11830116808414459,\n",
       " 0.2688466012477875,\n",
       " 0.06260889768600464,\n",
       " 0.02309643104672432,\n",
       " 0.06964331865310669,\n",
       " 0.03861911594867706,\n",
       " 0.13636240363121033,\n",
       " 0.12204936146736145,\n",
       " 0.06669215857982635,\n",
       " 0.024942168965935707,\n",
       " 0.03721117228269577,\n",
       " 0.03583051636815071,\n",
       " 0.03584276884794235,\n",
       " 0.030492568388581276,\n",
       " 0.049663230776786804,\n",
       " 0.05735832452774048,\n",
       " 0.017051974311470985,\n",
       " 0.03523822873830795,\n",
       " 0.03202565386891365,\n",
       " 0.04725823923945427,\n",
       " 0.12893228232860565,\n",
       " 0.05369587987661362,\n",
       " 0.059402815997600555,\n",
       " 0.03468054160475731,\n",
       " 0.05395803973078728,\n",
       " 0.03539625182747841,\n",
       " 0.2015077769756317,\n",
       " 0.09085898846387863,\n",
       " 0.028005624189972878,\n",
       " 0.0667819231748581,\n",
       " 0.04037237539887428,\n",
       " 0.18131670355796814,\n",
       " 0.05987803265452385,\n",
       " 0.05780469998717308,\n",
       " 0.02719230204820633,\n",
       " 0.030423060059547424,\n",
       " 0.05053051561117172,\n",
       " 0.06877638399600983,\n",
       " 0.05907575041055679,\n",
       " 0.021351618692278862,\n",
       " 0.11394309997558594,\n",
       " 0.053594399243593216,\n",
       " 0.10110218822956085,\n",
       " 0.13501515984535217,\n",
       " 0.050411127507686615,\n",
       " 0.03422318026423454,\n",
       " 0.2536636292934418,\n",
       " 0.061379726976156235,\n",
       " 0.09928023815155029,\n",
       " 0.04747406020760536,\n",
       " 0.10411464422941208,\n",
       " 0.5169481039047241,\n",
       " 0.06736201047897339,\n",
       " 0.15860888361930847,\n",
       " 0.05372478440403938,\n",
       " 0.11371945589780807,\n",
       " 0.03602738305926323,\n",
       " 0.18691308796405792,\n",
       " 0.10293374955654144,\n",
       " 0.06345084309577942,\n",
       " 0.06563040614128113,\n",
       " 0.04486239701509476,\n",
       " 0.02011091262102127,\n",
       " 0.10817983746528625,\n",
       " 0.07393988221883774,\n",
       " 0.17515778541564941,\n",
       " 0.07506236433982849,\n",
       " 0.12377643585205078,\n",
       " 0.06288991868495941,\n",
       " 0.052632614970207214,\n",
       " 0.11619420349597931,\n",
       " 0.03847669064998627,\n",
       " 0.07587035745382309,\n",
       " 0.311026930809021,\n",
       " 0.08157555013895035,\n",
       " 0.16591787338256836,\n",
       " 0.02459457516670227,\n",
       " 0.1466558277606964,\n",
       " 0.02945157326757908,\n",
       " 0.03548036888241768,\n",
       " 0.03053637407720089,\n",
       " 0.06344855576753616,\n",
       " 0.0557742603123188,\n",
       " 0.03470824286341667,\n",
       " 0.13192571699619293,\n",
       " 0.037928856909275055,\n",
       " 0.044478949159383774,\n",
       " 0.06271667778491974,\n",
       " 0.07074806839227676,\n",
       " 0.03992407023906708,\n",
       " 0.10563419759273529,\n",
       " 0.16265082359313965,\n",
       " 0.030746934935450554,\n",
       " 0.06765319406986237,\n",
       " 0.18624959886074066,\n",
       " 0.03647158667445183,\n",
       " 0.05157794430851936,\n",
       " 0.07289182394742966,\n",
       " 0.08590467274188995,\n",
       " 0.037205882370471954,\n",
       " 0.0257713682949543,\n",
       " 0.05212021619081497,\n",
       " 0.21074552834033966,\n",
       " 0.06060027331113815,\n",
       " 0.19964373111724854,\n",
       " 0.16401366889476776,\n",
       " 0.028099291026592255,\n",
       " 0.09699218720197678,\n",
       " 0.1128750592470169,\n",
       " 0.153528094291687,\n",
       " 0.11045274883508682,\n",
       " 0.09178707748651505,\n",
       " 0.055407024919986725,\n",
       " 0.04496483877301216,\n",
       " 0.03719280660152435,\n",
       " 0.09319733083248138,\n",
       " 0.07012350112199783,\n",
       " 0.03973418101668358,\n",
       " 0.04156848043203354,\n",
       " 0.1707957535982132,\n",
       " 0.03374147415161133,\n",
       " 0.1648533046245575,\n",
       " 0.06694913655519485,\n",
       " 0.04881034791469574,\n",
       " 0.044650476425886154,\n",
       " 0.1442776620388031,\n",
       " 0.09604538232088089,\n",
       " 0.06381257623434067,\n",
       " 0.26622605323791504,\n",
       " 0.030317753553390503,\n",
       " 0.22371108829975128,\n",
       " 0.08151829242706299,\n",
       " 0.23407316207885742,\n",
       " 0.029585257172584534,\n",
       " 0.06266219913959503,\n",
       " 0.26549261808395386,\n",
       " 0.06774194538593292,\n",
       " 0.02396443672478199,\n",
       " 0.03396550565958023,\n",
       " 0.0450105257332325,\n",
       " 0.04149557277560234,\n",
       " 0.04695688188076019,\n",
       " 0.05122069641947746,\n",
       " 0.02033059298992157,\n",
       " 0.08892922848463058,\n",
       " 0.03899766132235527,\n",
       " 0.06255039572715759,\n",
       " 0.0334644615650177,\n",
       " 0.2850794494152069,\n",
       " 0.04772552102804184,\n",
       " 0.1060880646109581,\n",
       " 0.088255375623703,\n",
       " 0.06618666648864746,\n",
       " 0.061119891703128815,\n",
       " 0.03122597560286522,\n",
       " 0.03851215913891792,\n",
       " 0.045152127742767334,\n",
       " 0.04264447093009949,\n",
       " 0.0540623739361763,\n",
       " 0.06660711020231247,\n",
       " 0.04495788365602493,\n",
       " 0.03237925469875336,\n",
       " 0.052003130316734314,\n",
       " 0.04870743304491043,\n",
       " 0.03287087008357048,\n",
       " 0.1530287265777588,\n",
       " 0.06592223048210144,\n",
       " 0.11171049624681473,\n",
       " 0.0700758621096611,\n",
       " 0.09055113047361374,\n",
       " 0.14344580471515656,\n",
       " 0.02901759371161461,\n",
       " 0.04834083467721939,\n",
       " 0.037656188011169434,\n",
       " 0.10242567211389542,\n",
       " 0.1825432926416397,\n",
       " 0.19612830877304077,\n",
       " 0.06464529037475586,\n",
       " 0.09119366854429245,\n",
       " 0.028015658259391785,\n",
       " 0.07851945608854294,\n",
       " 0.10461153090000153,\n",
       " 0.14327417314052582,\n",
       " 0.049767717719078064,\n",
       " 0.09648337215185165,\n",
       " 0.015945641323924065,\n",
       " 0.024270113557577133,\n",
       " 0.07491869479417801,\n",
       " 0.04213240370154381,\n",
       " 0.04413394257426262,\n",
       " 0.039152756333351135,\n",
       " 0.05446626991033554,\n",
       " 0.019847305491566658,\n",
       " 0.05792763829231262,\n",
       " 0.024212120100855827,\n",
       " 0.03902474045753479,\n",
       " 0.06913848966360092,\n",
       " 0.029697084799408913,\n",
       " 0.026168061420321465,\n",
       " 0.08621005713939667,\n",
       " 0.07626429200172424,\n",
       " 0.031165048480033875,\n",
       " 0.06632594019174576,\n",
       " 0.24267765879631042,\n",
       " 0.1918545812368393,\n",
       " 0.10814889520406723,\n",
       " 0.10232631117105484,\n",
       " 0.07839473336935043,\n",
       " 0.02742159366607666,\n",
       " 0.15463294088840485,\n",
       " 0.023295704275369644,\n",
       " 0.02080385573208332,\n",
       " 0.02996056340634823,\n",
       " 0.04610548913478851,\n",
       " 0.09957515448331833,\n",
       " 0.04806463047862053,\n",
       " 0.08573813736438751,\n",
       " 0.04728425294160843,\n",
       " 0.18879355490207672,\n",
       " 0.033618222922086716,\n",
       " 0.07075203955173492,\n",
       " 0.1164277046918869,\n",
       " 0.3264194130897522,\n",
       " 0.1723061501979828,\n",
       " 0.10902751237154007,\n",
       " 0.19211715459823608,\n",
       " 0.09303739666938782,\n",
       " 0.04016238823533058,\n",
       " 0.049924347549676895,\n",
       " 0.25351372361183167,\n",
       " 0.09723994135856628,\n",
       " 0.04845902696251869,\n",
       " 0.07687260955572128,\n",
       " 0.2092534452676773,\n",
       " 0.05441931262612343,\n",
       " 0.04299810156226158,\n",
       " 0.2191939651966095,\n",
       " 0.040391575545072556,\n",
       " 0.03540709614753723,\n",
       " 0.0642881914973259,\n",
       " 0.05204727128148079,\n",
       " 0.10873828828334808,\n",
       " 0.07282879948616028,\n",
       " 0.09578656405210495,\n",
       " 0.03383275493979454,\n",
       " 0.05712180212140083,\n",
       " 0.039114102721214294,\n",
       " 0.033581968396902084,\n",
       " 0.1568588763475418,\n",
       " 0.06475626677274704,\n",
       " 0.02475585602223873,\n",
       " 0.41876330971717834,\n",
       " 0.03612890839576721,\n",
       " 0.11657785624265671,\n",
       " 0.05431724339723587,\n",
       " 0.16770441830158234,\n",
       " 0.030350498855113983,\n",
       " 0.13726596534252167,\n",
       " 0.06324649602174759,\n",
       " 0.07860378175973892,\n",
       " 0.12398781627416611,\n",
       " 0.021274574100971222,\n",
       " 0.022703271359205246,\n",
       " 0.048349566757678986,\n",
       " 0.11050061881542206,\n",
       " 0.06524597853422165,\n",
       " 0.0734436884522438,\n",
       " 0.149702787399292,\n",
       " 0.03898262977600098,\n",
       " 0.06958620995283127,\n",
       " 0.024460135027766228,\n",
       " 0.027979765087366104,\n",
       " 0.127155140042305,\n",
       " 0.03166554495692253,\n",
       " 0.13801684975624084,\n",
       " 0.038323547691106796,\n",
       " 0.06787386536598206,\n",
       " 0.035622257739305496,\n",
       " 0.10033565759658813,\n",
       " 0.0311440359801054,\n",
       " 0.02600337564945221,\n",
       " 0.10641063749790192,\n",
       " 0.03634757176041603,\n",
       " 0.04158780351281166,\n",
       " 0.037957508116960526,\n",
       " 0.03828288987278938,\n",
       " 0.030446473509073257,\n",
       " 0.05256654694676399,\n",
       " 0.103183314204216,\n",
       " 0.07303179055452347,\n",
       " 0.0567825585603714,\n",
       " 0.04656092822551727,\n",
       " 0.17782475054264069,\n",
       " 0.05894114822149277,\n",
       " 0.037706296890974045,\n",
       " 0.05997772887349129,\n",
       " 0.09483397006988525,\n",
       " 0.09648361057043076,\n",
       " 0.058775644749403,\n",
       " 0.025263430550694466,\n",
       " 0.03559737652540207,\n",
       " 0.03718835487961769,\n",
       " 0.037433210760354996,\n",
       " 0.08024080842733383,\n",
       " 0.0329367034137249,\n",
       " 0.03249708190560341,\n",
       " 0.10160797089338303,\n",
       " 0.07969211041927338,\n",
       " 0.06215725839138031,\n",
       " 0.17166589200496674,\n",
       " 0.14642252027988434,\n",
       " 0.10035371780395508,\n",
       " 0.05296153202652931,\n",
       " 0.056795310229063034,\n",
       " 0.612654983997345,\n",
       " 0.09611662477254868,\n",
       " 0.06100054457783699,\n",
       " 0.031606439501047134,\n",
       " 0.02365294098854065,\n",
       " 0.022931382060050964,\n",
       " 0.06888768076896667,\n",
       " 0.08355206996202469,\n",
       " 0.08406144380569458,\n",
       " 0.042591702193021774,\n",
       " 0.05930786579847336,\n",
       " 0.05367044359445572,\n",
       " 0.06695730984210968,\n",
       " 0.14204104244709015,\n",
       " 0.06348727643489838,\n",
       " 0.05567392706871033,\n",
       " 0.06174761801958084,\n",
       " 0.033453695476055145,\n",
       " 0.03891436755657196,\n",
       " 0.020075727254152298,\n",
       " 0.0414951890707016,\n",
       " 0.032506175339221954,\n",
       " 0.06736794114112854,\n",
       " 0.10158678144216537,\n",
       " 0.09006553888320923,\n",
       " 0.04150666669011116,\n",
       " 0.02684030868113041,\n",
       " 0.025398092344403267,\n",
       " 0.1797437220811844,\n",
       " 0.04471973702311516,\n",
       " 0.021255958825349808,\n",
       " 0.03510395810008049,\n",
       " 0.03382451832294464,\n",
       " 0.10007128119468689,\n",
       " 0.04526577144861221,\n",
       " 0.026842957362532616,\n",
       " 0.08270543068647385,\n",
       " 0.10471946746110916,\n",
       " 0.057283055037260056,\n",
       " 0.07271410524845123,\n",
       " 0.04449273645877838,\n",
       " 0.13745062053203583,\n",
       " 0.12911921739578247,\n",
       " 0.14784403145313263,\n",
       " 0.09329920262098312,\n",
       " 0.1216944083571434,\n",
       " 0.027337318286299706,\n",
       " 0.02396107278764248,\n",
       " 0.042377181351184845,\n",
       " 0.062158361077308655,\n",
       " 0.04446171969175339,\n",
       " 0.04728274047374725,\n",
       " 0.05980304628610611,\n",
       " 0.02914244867861271,\n",
       " 0.06978300213813782,\n",
       " 0.04375814273953438,\n",
       " 0.20036965608596802,\n",
       " 0.07997465878725052,\n",
       " 0.06835726648569107,\n",
       " 0.041683170944452286,\n",
       " 0.030514631420373917,\n",
       " 0.026652103289961815,\n",
       " 0.17441785335540771,\n",
       " 0.05094584450125694,\n",
       " 0.021957408636808395,\n",
       " 0.2710874676704407,\n",
       " 0.24634550511837006,\n",
       " 0.07649432867765427,\n",
       " 0.2960168421268463,\n",
       " 0.10140511393547058,\n",
       " 0.14266598224639893,\n",
       " 0.05118827149271965,\n",
       " 0.08693334460258484,\n",
       " 0.06925912201404572,\n",
       " 0.029692446812987328,\n",
       " 0.10609026998281479,\n",
       " 0.08814039081335068,\n",
       " 0.026836788281798363,\n",
       " 0.05979667603969574,\n",
       " 0.12237458676099777,\n",
       " 0.06713636964559555,\n",
       " 0.05575822293758392,\n",
       " 0.0751352459192276,\n",
       " 0.05496926233172417,\n",
       " 0.13758447766304016,\n",
       " 0.046654775738716125,\n",
       " 0.03589659184217453,\n",
       " 0.11613260954618454,\n",
       " 0.06733033806085587,\n",
       " 0.03474242240190506,\n",
       " 0.029289567843079567,\n",
       " 0.11257215589284897,\n",
       " 0.034357309341430664,\n",
       " 0.037685949355363846,\n",
       " 0.0689353346824646,\n",
       " 0.1257118135690689,\n",
       " 0.04883013293147087,\n",
       " 0.08084968477487564,\n",
       " 0.04231289029121399,\n",
       " 0.09856132417917252,\n",
       " 0.12087294459342957,\n",
       " 0.10143931210041046,\n",
       " 0.04290332645177841,\n",
       " 0.062068067491054535,\n",
       " 0.10409357398748398,\n",
       " 0.052931539714336395,\n",
       " 0.07590798288583755,\n",
       " 0.09673279523849487,\n",
       " 0.03474681079387665,\n",
       " 0.05684053152799606,\n",
       " 0.20651596784591675,\n",
       " 0.05751960352063179,\n",
       " 0.0388704277575016,\n",
       " 0.052278485149145126,\n",
       " 0.21900348365306854,\n",
       " 0.10898563265800476,\n",
       " 0.06644424796104431,\n",
       " 0.024152887985110283,\n",
       " 0.024472199380397797,\n",
       " 0.14489804208278656,\n",
       " 0.11115849763154984,\n",
       " 0.16496263444423676,\n",
       " 0.054227616637945175,\n",
       " 0.038841359317302704,\n",
       " 0.050011854618787766,\n",
       " 0.07934903353452682,\n",
       " 0.045532867312431335,\n",
       " 0.03091382049024105,\n",
       " 0.04723147675395012,\n",
       " 0.02147972211241722,\n",
       " 0.1578952819108963,\n",
       " 0.05623535439372063,\n",
       " 0.08519773930311203,\n",
       " 0.09720073640346527,\n",
       " 0.05237424001097679,\n",
       " 0.042325709015131,\n",
       " 0.027779225260019302,\n",
       " 0.02864629216492176,\n",
       " 0.02926797792315483,\n",
       " 0.048948630690574646,\n",
       " 0.053239062428474426,\n",
       " 0.1079203188419342,\n",
       " 0.029218783602118492,\n",
       " 0.07322125136852264,\n",
       " 0.10054854303598404,\n",
       " 0.0599575899541378,\n",
       " 0.09058400243520737,\n",
       " 0.08807015419006348,\n",
       " 0.12061145901679993,\n",
       " 0.04694662243127823,\n",
       " 0.08164673298597336,\n",
       " 0.06242657080292702,\n",
       " 0.039607975631952286,\n",
       " 0.07213323563337326,\n",
       " 0.08763103932142258,\n",
       " 0.36704784631729126,\n",
       " 0.19707000255584717,\n",
       " 0.050492268055677414,\n",
       " 0.07695997506380081,\n",
       " 0.03952956944704056,\n",
       " 0.017810743302106857,\n",
       " 0.06521592289209366,\n",
       " 0.02978295460343361,\n",
       " 0.055338699370622635,\n",
       " 0.06651680916547775,\n",
       " 0.43312087655067444,\n",
       " 0.0465678796172142,\n",
       " 0.11148732900619507,\n",
       " 0.10537445545196533,\n",
       " 0.12400811910629272,\n",
       " 0.03807182237505913,\n",
       " 0.036441538482904434,\n",
       " 0.05027085170149803,\n",
       " 0.06196950003504753,\n",
       " 0.04203026741743088,\n",
       " 0.1277773231267929,\n",
       " 0.03500267118215561,\n",
       " 0.05122890695929527,\n",
       " 0.0615219920873642,\n",
       " 0.06823304295539856,\n",
       " 0.10229797661304474,\n",
       " 0.05532129108905792,\n",
       " 0.16119715571403503,\n",
       " 0.04524514824151993,\n",
       " 0.04730745032429695,\n",
       " 0.037781454622745514,\n",
       " 0.0650399923324585,\n",
       " 0.07493326812982559,\n",
       " 0.024397533386945724,\n",
       " 0.06201842054724693,\n",
       " 0.05733151733875275,\n",
       " 0.02287258766591549,\n",
       " 0.029992271214723587,\n",
       " 0.0939626395702362,\n",
       " 0.032379694283008575,\n",
       " 0.06151549890637398,\n",
       " 0.23052428662776947,\n",
       " 0.06615308672189713,\n",
       " 0.04075095057487488,\n",
       " 0.06179390475153923,\n",
       " 0.04788685962557793,\n",
       " 0.1022413820028305,\n",
       " 0.056595299392938614,\n",
       " 0.10720273107290268,\n",
       " 0.14514988660812378,\n",
       " 0.057095155119895935,\n",
       " 0.18891212344169617,\n",
       " 0.06199021637439728,\n",
       " 0.17338153719902039,\n",
       " 0.27672380208969116,\n",
       " 0.07017537951469421,\n",
       " 0.037071358412504196,\n",
       " 0.08029317110776901,\n",
       " 0.046313583850860596,\n",
       " 0.03629003092646599,\n",
       " 0.13619562983512878,\n",
       " 0.07153468579053879,\n",
       " 0.028945429250597954,\n",
       " 0.05423334613442421,\n",
       " 0.09934449195861816,\n",
       " 0.061402883380651474,\n",
       " 0.04495488479733467,\n",
       " 0.020015913993120193,\n",
       " 0.08921083062887192,\n",
       " 0.03637588396668434,\n",
       " 0.16574226319789886,\n",
       " 0.03518042340874672,\n",
       " 0.1232433021068573,\n",
       " 0.05054579675197601,\n",
       " 0.05714532732963562,\n",
       " 0.12758584320545197,\n",
       " 0.05928238108754158,\n",
       " 0.09040574729442596,\n",
       " 0.045560263097286224,\n",
       " 0.06065494939684868,\n",
       " 0.0639016181230545,\n",
       " 0.06216944754123688,\n",
       " 0.051946718245744705,\n",
       " 0.07809852063655853,\n",
       " 0.08668864518404007,\n",
       " 0.059763938188552856,\n",
       " 0.11776655167341232,\n",
       " 0.02165316604077816,\n",
       " 0.04879682883620262,\n",
       " 0.1077425479888916,\n",
       " 0.04762463644146919,\n",
       " 0.03258078172802925,\n",
       " 0.2842461168766022,\n",
       " 0.15722881257534027,\n",
       " 0.08321468532085419,\n",
       " 0.08282601833343506,\n",
       " 0.11945679038763046,\n",
       " 0.05725580081343651,\n",
       " 0.09673463553190231,\n",
       " 0.09139511734247208,\n",
       " 0.06647444516420364,\n",
       " 0.021132294088602066,\n",
       " 0.02373507246375084,\n",
       " 0.03873834386467934,\n",
       " 0.03577394783496857,\n",
       " 0.10084836930036545,\n",
       " 0.09320175647735596,\n",
       " 0.03343222662806511,\n",
       " 0.09565657377243042,\n",
       " 0.055659353733062744,\n",
       " 0.13756383955478668,\n",
       " 0.04973261430859566,\n",
       " 0.07952773571014404,\n",
       " 0.0373360700905323,\n",
       " 0.06799793243408203,\n",
       " 0.18782731890678406,\n",
       " 0.12481880187988281,\n",
       " 0.08948549628257751,\n",
       " 0.07691624760627747,\n",
       " 0.2143297642469406,\n",
       " 0.24471847712993622,\n",
       " 0.13806290924549103,\n",
       " 0.08955729007720947,\n",
       " 0.04822927713394165,\n",
       " 0.05488996207714081,\n",
       " 0.05147157981991768,\n",
       " 0.11493594944477081,\n",
       " 0.27329903841018677,\n",
       " 0.038257185369729996,\n",
       " 0.1911698281764984,\n",
       " 0.0374474823474884,\n",
       " 0.42283785343170166,\n",
       " 0.18635936081409454,\n",
       " 0.07980402559041977,\n",
       " 0.04040278121829033,\n",
       " 0.17801012098789215,\n",
       " 0.021549591794610023,\n",
       " 0.047216035425662994,\n",
       " 0.036992285400629044,\n",
       " 0.02616749331355095,\n",
       " 0.20294639468193054,\n",
       " 0.05861371010541916,\n",
       " 0.042352236807346344,\n",
       " 0.04433530569076538,\n",
       " 0.07780134677886963,\n",
       " 0.1884113997220993,\n",
       " 0.039914168417453766,\n",
       " 0.0670749768614769,\n",
       " 0.043691087514162064,\n",
       " 0.05375536158680916,\n",
       " 0.11839774250984192,\n",
       " 0.13607870042324066,\n",
       " 0.13213743269443512,\n",
       " 0.06453520804643631,\n",
       " 0.07444753497838974,\n",
       " 0.04670301824808121,\n",
       " 0.07157900184392929,\n",
       " 0.338763952255249,\n",
       " 0.0301892701536417,\n",
       " 0.11264295130968094,\n",
       " 0.04852312058210373,\n",
       " 0.028430629521608353,\n",
       " 0.058528438210487366,\n",
       " 0.11265045404434204,\n",
       " 0.1392105519771576,\n",
       " 0.030259858816862106,\n",
       " 0.059427566826343536,\n",
       " 0.02608046866953373,\n",
       " 0.02269883267581463,\n",
       " 0.08451254665851593,\n",
       " 0.045840464532375336,\n",
       " 0.2491520643234253,\n",
       " 0.08189588785171509,\n",
       " 0.12396872788667679,\n",
       " 0.04240015894174576,\n",
       " 0.0513860359787941,\n",
       " 0.12833380699157715,\n",
       " 0.028289228677749634,\n",
       " 0.05517812818288803,\n",
       " 0.08532628417015076,\n",
       " 0.06375166028738022,\n",
       " 0.10383864492177963,\n",
       " 0.03583001345396042,\n",
       " 0.2652103304862976,\n",
       " 0.11617652326822281,\n",
       " 0.18399181962013245,\n",
       " 0.032059602439403534,\n",
       " 0.031177954748272896,\n",
       " 0.09739363938570023,\n",
       " 0.03539005666971207,\n",
       " 0.018924688920378685,\n",
       " 0.02270606905221939,\n",
       " 0.13008300960063934,\n",
       " 0.12139811366796494,\n",
       " 0.046806588768959045,\n",
       " 0.07612582296133041,\n",
       " 0.13960519433021545,\n",
       " 0.03722599148750305,\n",
       " 0.07501398772001266,\n",
       " 0.08636559545993805,\n",
       " 0.11962509900331497,\n",
       " 0.1232992559671402,\n",
       " 0.040459275245666504,\n",
       " 0.11167016625404358,\n",
       " 0.050891440361738205,\n",
       " 0.08285205811262131,\n",
       " 0.0449562706053257,\n",
       " 0.07763224840164185,\n",
       " 0.14657843112945557,\n",
       " 0.03163966163992882,\n",
       " 0.09385009855031967,\n",
       " 0.05724480748176575,\n",
       " 0.12611329555511475,\n",
       " 0.024767419323325157,\n",
       " 0.06178374961018562,\n",
       " 0.03997806832194328,\n",
       " 0.027165589854121208,\n",
       " 0.06513053923845291,\n",
       " 0.050190653651952744,\n",
       " 0.04496728628873825,\n",
       " 0.05325282737612724,\n",
       " 0.019831562414765358,\n",
       " 0.03136029466986656,\n",
       " 0.03060004860162735,\n",
       " 0.09297516196966171,\n",
       " 0.04592094197869301,\n",
       " 0.024548077955842018,\n",
       " 0.14033615589141846,\n",
       " 0.045503392815589905,\n",
       " 0.07301647216081619,\n",
       " 0.05543583258986473,\n",
       " 0.0221114419400692,\n",
       " 0.6265426874160767,\n",
       " 0.03140518441796303,\n",
       " 0.12650227546691895,\n",
       " 0.06702972948551178,\n",
       " 0.16972941160202026,\n",
       " 0.15718790888786316,\n",
       " 0.03349948301911354,\n",
       " 0.08135166019201279,\n",
       " 0.055221278220415115,\n",
       " 0.14982888102531433,\n",
       " 0.03489070013165474,\n",
       " 0.03052414022386074,\n",
       " 0.03753295913338661,\n",
       " 0.02294224500656128,\n",
       " 0.06025979667901993,\n",
       " 0.18139444291591644,\n",
       " 0.1279875636100769,\n",
       " 0.019069697707891464,\n",
       " 0.0267944373190403,\n",
       " 0.1770668476819992,\n",
       " 0.1793706715106964,\n",
       " 0.09214803576469421,\n",
       " 0.04147719964385033,\n",
       " 0.09642037749290466,\n",
       " 0.0588296540081501,\n",
       " 0.07551804184913635,\n",
       " 0.02806948311626911,\n",
       " 0.03908955305814743,\n",
       " 0.040883976966142654,\n",
       " 0.045370109379291534,\n",
       " 0.053778745234012604,\n",
       " 0.037208572030067444,\n",
       " 0.03763310983777046,\n",
       " 0.05172067880630493,\n",
       " 0.11758095771074295,\n",
       " 0.03661714494228363,\n",
       " 0.041850004345178604,\n",
       " 0.17033270001411438,\n",
       " 0.5737029910087585,\n",
       " 0.053698260337114334,\n",
       " 0.12936334311962128,\n",
       " 0.061049021780490875,\n",
       " 0.028339093551039696,\n",
       " 0.07561339437961578,\n",
       " 0.033847272396087646,\n",
       " 0.05946680158376694,\n",
       " 0.10608530044555664,\n",
       " 0.057932980358600616,\n",
       " 0.07907281816005707,\n",
       " 0.06371144205331802,\n",
       " 0.06025350093841553,\n",
       " 0.09854678809642792,\n",
       " 0.02916502021253109,\n",
       " 0.13957583904266357,\n",
       " 0.11364433169364929,\n",
       " 0.039870355278253555,\n",
       " 0.07696317881345749,\n",
       " 0.03643231838941574,\n",
       " 0.06038641929626465,\n",
       " 0.04953371360898018,\n",
       " 0.1655670553445816,\n",
       " 0.08648756891489029,\n",
       " 0.0566597543656826,\n",
       " 0.10527636855840683,\n",
       " 0.16847766935825348,\n",
       " 0.06840163469314575,\n",
       " 0.02945428527891636,\n",
       " 0.06192723289132118,\n",
       " 0.1478622704744339,\n",
       " 0.0722048208117485,\n",
       " 0.06298573315143585,\n",
       " 0.07516064494848251,\n",
       " 0.2604660093784332,\n",
       " 0.30824390053749084,\n",
       " 0.033967070281505585,\n",
       " 0.0966741070151329,\n",
       " 0.042531151324510574,\n",
       " 0.029532575979828835,\n",
       " 0.058474794030189514,\n",
       " 0.021080605685710907,\n",
       " 0.07856316864490509,\n",
       " 0.09532849490642548,\n",
       " 0.22755971550941467,\n",
       " 0.034103792160749435,\n",
       " 0.1438179612159729,\n",
       " 0.024848632514476776,\n",
       " 0.037472158670425415,\n",
       " 0.018815329298377037,\n",
       " 0.0568460114300251,\n",
       " 0.05384759604930878,\n",
       " 0.12004279345273972,\n",
       " 0.032671503722667694,\n",
       " 0.08404472470283508,\n",
       " 0.024228142574429512,\n",
       " 0.05024796351790428,\n",
       " 0.06648421287536621,\n",
       " 0.08278580009937286,\n",
       " 0.06827743351459503,\n",
       " 0.04952514171600342,\n",
       " 0.4017587900161743,\n",
       " 0.03897494450211525,\n",
       " 0.09109407663345337,\n",
       " 0.3763371706008911,\n",
       " 0.21625541150569916,\n",
       " 0.5029148459434509,\n",
       " 0.19662372767925262,\n",
       " 0.1347530633211136,\n",
       " 0.06841906905174255,\n",
       " 0.0971040353178978,\n",
       " 0.03509891405701637,\n",
       " 0.1628592312335968,\n",
       " 0.06142311915755272,\n",
       " 0.06586005538702011,\n",
       " 0.04480540007352829,\n",
       " 0.05868183821439743,\n",
       " 0.0419393815100193,\n",
       " 0.03379281610250473,\n",
       " 0.13919095695018768,\n",
       " 0.02525876834988594,\n",
       " 0.11987438052892685,\n",
       " 0.08026784658432007,\n",
       " 0.13593152165412903,\n",
       " 0.36814039945602417,\n",
       " 0.09495224058628082,\n",
       " 0.05721588805317879,\n",
       " 0.05046723783016205,\n",
       " 0.10939318686723709,\n",
       " 0.049867983907461166,\n",
       " 0.038386426866054535,\n",
       " 0.06744446605443954,\n",
       " 0.09973515570163727,\n",
       " 0.03663421422243118,\n",
       " 0.13550259172916412,\n",
       " 0.018425723537802696,\n",
       " 0.15455536544322968,\n",
       " 0.06755463778972626,\n",
       " 0.020557375624775887,\n",
       " 0.04286770150065422,\n",
       " 0.08751440793275833,\n",
       " 0.05411035194993019,\n",
       " 0.2547513544559479,\n",
       " 0.10548006743192673,\n",
       " 0.0553574338555336,\n",
       " 0.037535957992076874,\n",
       " 0.041605666279792786,\n",
       " 0.10682918131351471,\n",
       " 0.1383773684501648,\n",
       " 0.03680591285228729,\n",
       " 0.07151277363300323,\n",
       " 0.14258208870887756,\n",
       " 0.03216893970966339,\n",
       " 0.036895815283060074,\n",
       " 0.020616913214325905,\n",
       " 0.06479767709970474,\n",
       " 0.09668432921171188,\n",
       " 0.044976625591516495,\n",
       " 0.05411948263645172,\n",
       " 0.018028339371085167,\n",
       " 0.03899696469306946,\n",
       " 0.04694199562072754,\n",
       " 0.02461916022002697,\n",
       " 0.03189811110496521,\n",
       " 0.08907338976860046,\n",
       " 0.1375758796930313,\n",
       " 0.12521083652973175,\n",
       " 0.11718465387821198,\n",
       " 0.2725251019001007,\n",
       " 0.058143630623817444,\n",
       " 0.12151399254798889,\n",
       " 0.1284765899181366,\n",
       " 0.13175520300865173,\n",
       " 0.08789083361625671,\n",
       " 0.02928311377763748,\n",
       " 0.09605032950639725,\n",
       " 0.05553583800792694,\n",
       " 0.05359324440360069,\n",
       " 0.17190039157867432,\n",
       " 0.043378885835409164,\n",
       " 0.05734424665570259,\n",
       " 0.13160184025764465,\n",
       " 0.07879728078842163,\n",
       " 0.05319330096244812,\n",
       " 0.09009312093257904,\n",
       " 0.03154662996530533,\n",
       " 0.04068861901760101,\n",
       " 0.2433079034090042,\n",
       " 0.08300592750310898,\n",
       " 0.038297057151794434,\n",
       " 0.031096117570996284,\n",
       " 0.09350099414587021,\n",
       " 0.05104351416230202,\n",
       " 0.3151170611381531,\n",
       " 0.05532943457365036,\n",
       " 0.03205609321594238,\n",
       " 0.0830257311463356,\n",
       " 0.023052101954817772,\n",
       " 0.0322871059179306,\n",
       " 0.04687163233757019,\n",
       " 0.040724292397499084,\n",
       " 0.05738189443945885,\n",
       " 0.1914212554693222,\n",
       " 0.05997275188565254,\n",
       " 0.02160460688173771,\n",
       " 0.08172950893640518,\n",
       " 0.176561176776886,\n",
       " 0.10342389345169067,\n",
       " 0.03590356186032295,\n",
       " 0.028980424627661705,\n",
       " 0.08463802188634872,\n",
       " 0.03462473675608635,\n",
       " 0.09696123003959656,\n",
       " 0.0948820561170578,\n",
       " 0.030164236202836037,\n",
       " 0.08163236081600189,\n",
       " 0.09079662710428238,\n",
       " 0.08527382463216782,\n",
       " 0.03231038525700569,\n",
       " 0.1413751095533371,\n",
       " 0.05265148729085922,\n",
       " 0.04552474990487099,\n",
       " 0.04973987117409706,\n",
       " 0.14013969898223877,\n",
       " 0.031088031828403473,\n",
       " 0.19475050270557404,\n",
       " 0.16975688934326172,\n",
       " 0.16272719204425812,\n",
       " 0.04928617924451828,\n",
       " 0.051445577293634415,\n",
       " 0.06299855560064316,\n",
       " 0.07096774876117706,\n",
       " 0.0978393629193306,\n",
       " 0.02766236662864685,\n",
       " 0.12742745876312256,\n",
       " 0.020139947533607483,\n",
       " 0.1681257039308548,\n",
       " 0.06954234093427658,\n",
       " 0.04525081068277359,\n",
       " 0.6642180681228638,\n",
       " 0.37605851888656616,\n",
       " 0.08714096248149872,\n",
       " 0.05549418553709984,\n",
       " 0.08315110206604004,\n",
       " 0.036607809364795685,\n",
       " 0.13532887399196625,\n",
       " 0.05552152171730995,\n",
       " 0.15510864555835724,\n",
       " 0.03938803821802139,\n",
       " 0.17317302525043488,\n",
       " 0.04052438214421272,\n",
       " 0.04465610533952713,\n",
       " 0.2384616583585739,\n",
       " 0.05964722856879234,\n",
       " 0.21297132968902588,\n",
       " 0.0394112691283226,\n",
       " 0.03628295660018921,\n",
       " 0.05524817854166031,\n",
       " 0.06795297563076019,\n",
       " 0.027968518435955048,\n",
       " 0.09682062268257141,\n",
       " 0.0484076552093029,\n",
       " 0.12839503586292267,\n",
       " 0.019582655280828476,\n",
       " 0.09756653010845184,\n",
       " 0.08792352676391602,\n",
       " 0.046953123062849045,\n",
       " 0.08002877980470657,\n",
       " 0.03534771874547005,\n",
       " 0.03409164026379585,\n",
       " 0.06404507905244827,\n",
       " 0.15466471016407013,\n",
       " 0.05313846096396446,\n",
       " 0.06594367325305939,\n",
       " 0.09293480962514877,\n",
       " 0.02737482264637947,\n",
       " 0.04459265619516373,\n",
       " 0.02688700333237648,\n",
       " 0.03877761587500572,\n",
       " 0.030191695317626,\n",
       " 0.07304501533508301,\n",
       " 0.06893777847290039,\n",
       " 0.07463103532791138,\n",
       " 0.22722698748111725,\n",
       " 0.058241937309503555,\n",
       " 0.05778706818819046,\n",
       " ...]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 出现大于100次的单词在句子中的权重，句子需要是等长的吗？\n",
    "word_attention_li['the']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_mean_std_li = meam_std_list(word_attention_li)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "30个标准差最大的单词: \n",
      "awful | std：0.2445\n",
      "stupid | std：0.2341\n",
      "tedious | std：0.2339\n",
      "terrific | std：0.2335\n",
      "watchable | std：0.2332\n",
      "excellent | std：0.225\n",
      "painful | std：0.2246\n",
      "brilliant | std：0.2244\n",
      "impressive | std：0.2235\n",
      "appealing | std：0.2233\n",
      "inventive | std：0.2226\n",
      "waste | std：0.2218\n",
      "beautifully | std：0.2206\n",
      "flat | std：0.218\n",
      "bland | std：0.2173\n",
      "worthy | std：0.2161\n",
      "remarkable | std：0.2155\n",
      "provocative | std：0.2146\n",
      "intriguing | std：0.2144\n",
      "cool | std：0.2143\n",
      "fine | std：0.2141\n",
      "boring | std：0.2137\n",
      "unfunny | std：0.2135\n",
      "mess | std：0.2135\n",
      "hackneyed | std：0.2115\n",
      "engrossing | std：0.2104\n",
      "gorgeous | std：0.2103\n",
      "lacking | std：0.2083\n",
      "delightful | std：0.2069\n",
      "stylish | std：0.2067\n"
     ]
    }
   ],
   "source": [
    "print('30个标准差最大的单词: ')\n",
    "for word, amean, astd in word_mean_std_li[:30]:\n",
    "    print('{} | std：{:.4}'.format(word, astd))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'inane': 0.4294554591178894,\n",
       "  'and': 0.1071346178650856,\n",
       "  'awful': 0.4634098410606384},\n",
       " {'a': 0.14175325632095337,\n",
       "  'thoroughly': 0.14105528593063354,\n",
       "  'awful': 0.580115556716919,\n",
       "  'movie': 0.13707591593265533},\n",
       " {'is': 0.13191638886928558,\n",
       "  'awful': 0.6552016139030457,\n",
       "  '.': 0.21288198232650757},\n",
       " {'this': 0.034046296030282974,\n",
       "  'wretchedly': 0.16158755123615265,\n",
       "  'unfunny': 0.1627073585987091,\n",
       "  'wannabe': 0.16242194175720215,\n",
       "  'comedy': 0.05273708328604698,\n",
       "  'is': 0.03338056057691574,\n",
       "  'inane': 0.1536465287208557,\n",
       "  'and': 0.03832961246371269,\n",
       "  'awful': 0.16579440236091614,\n",
       "  '-': 0.035348568111658096},\n",
       " {'if': 0.06239921972155571,\n",
       "  'oscar': 0.0761847272515297,\n",
       "  'had': 0.02653614804148674,\n",
       "  'a': 0.022323040291666985,\n",
       "  'category': 0.02682187594473362,\n",
       "  'called': 0.0599784180521965,\n",
       "  'best': 0.06136414036154747,\n",
       "  'bad': 0.09426537156105042,\n",
       "  'film': 0.028234658762812614,\n",
       "  'you': 0.03237655386328697,\n",
       "  'thought': 0.03296743333339691,\n",
       "  'was': 0.053605612367391586,\n",
       "  'going': 0.030031228438019753,\n",
       "  'to': 0.04958777129650116,\n",
       "  'be': 0.022541198879480362,\n",
       "  'really': 0.020105689764022827,\n",
       "  'awful': 0.09135551750659943,\n",
       "  'but': 0.05589110404253006,\n",
       "  \"n't\": 0.09982476383447647},\n",
       " {'the': 0.09079662710428238,\n",
       "  'master': 0.1595860719680786,\n",
       "  'of': 0.13098527491092682,\n",
       "  'disguise': 0.24217694997787476,\n",
       "  'is': 0.049660585820674896,\n",
       "  'awful': 0.2466539442539215,\n",
       "  '.': 0.08014049381017685},\n",
       " {'laughably': 0.13230930268764496,\n",
       "  ',': 0.2168578803539276,\n",
       "  'irredeemably': 0.2219807654619217,\n",
       "  'awful': 0.3236837685108185,\n",
       "  '.': 0.10516829788684845},\n",
       " {'thoroughly': 0.19559204578399658, 'awful': 0.8044079542160034},\n",
       " {'a': 0.019940337166190147,\n",
       "  'thoroughly': 0.019842153415083885,\n",
       "  'awful': 0.0816044807434082,\n",
       "  'movie': 0.019282380118966103,\n",
       "  '--': 0.017907172441482544,\n",
       "  'dumb': 0.08333073556423187,\n",
       "  ',': 0.054672423750162125,\n",
       "  'narratively': 0.021039120852947235,\n",
       "  'chaotic': 0.07769117504358292,\n",
       "  'visually': 0.021352296695113182,\n",
       "  'sloppy': 0.0803229808807373,\n",
       "  '...': 0.0183599554002285,\n",
       "  'weird': 0.06036731228232384,\n",
       "  'amalgam': 0.07513318955898285,\n",
       "  'of': 0.043335966765880585,\n",
       "  '`': 0.04615305736660957,\n",
       "  'the': 0.030039703473448753,\n",
       "  'thing': 0.06902974098920822,\n",
       "  \"'\": 0.021290428936481476,\n",
       "  'and': 0.01886594481766224,\n",
       "  'geriatric': 0.025886304676532745},\n",
       " {'leaves': 0.065589539706707,\n",
       "  'an': 0.08327170461416245,\n",
       "  'awful': 0.35922572016716003,\n",
       "  'sour': 0.35161733627319336,\n",
       "  'taste': 0.14029574394226074},\n",
       " {'explores': 0.20533469319343567,\n",
       "  'the': 0.09154585748910904,\n",
       "  'awful': 0.24868926405906677,\n",
       "  'complications': 0.12292340397834778,\n",
       "  'of': 0.13206613063812256,\n",
       "  'one': 0.054083701223134995,\n",
       "  'terrifying': 0.05196862667798996,\n",
       "  'day': 0.09338836371898651},\n",
       " {'most': 0.13492874801158905,\n",
       "  'awful': 0.7256471514701843,\n",
       "  'acts': 0.139424130320549},\n",
       " {'awful': 0.6692163348197937, 'complications': 0.3307836949825287},\n",
       " {'an': 0.08911683410406113,\n",
       "  'awful': 0.38444098830223083,\n",
       "  'sour': 0.37629854679107666,\n",
       "  'taste': 0.1501435935497284},\n",
       " {'after': 0.09156420081853867,\n",
       "  'the': 0.13955366611480713,\n",
       "  'most': 0.07049179822206497,\n",
       "  'awful': 0.3791050612926483,\n",
       "  'acts': 0.07284035533666611,\n",
       "  'are': 0.07767992466688156,\n",
       "  'committed': 0.16876499354839325},\n",
       " {\"'s\": 0.1126924604177475,\n",
       "  'pauly': 0.2691709101200104,\n",
       "  'shore': 0.28010356426239014,\n",
       "  'awful': 0.3380330502986908},\n",
       " {'awful': 0.8088712692260742, 'movie': 0.19112874567508698},\n",
       " {'so': 0.06524837017059326,\n",
       "  'insanely': 0.05868121609091759,\n",
       "  'stupid': 0.21289615333080292,\n",
       "  ',': 0.14679308235645294,\n",
       "  'awful': 0.21910451352596283,\n",
       "  'in': 0.051732514053583145,\n",
       "  'many': 0.05644390359520912,\n",
       "  'ways': 0.05860355496406555},\n",
       " {\"'s\": 0.032297469675540924,\n",
       "  'truly': 0.022847717627882957,\n",
       "  'awful': 0.09687971323728561,\n",
       "  'and': 0.022397389635443687,\n",
       "  'heartbreaking': 0.07621611654758453,\n",
       "  'subject': 0.06262871623039246,\n",
       "  'matter': 0.02416684292256832,\n",
       "  ',': 0.06490634381771088,\n",
       "  'but': 0.059270795434713364,\n",
       "  'one': 0.021068915724754333,\n",
       "  'whose': 0.024620911106467247,\n",
       "  'lessons': 0.06716205924749374,\n",
       "  'are': 0.01985098421573639,\n",
       "  'well': 0.04671965539455414,\n",
       "  'worth': 0.05730024725198746,\n",
       "  'revisiting': 0.07806351035833359,\n",
       "  'as': 0.06219511479139328,\n",
       "  'many': 0.024957355111837387,\n",
       "  'times': 0.019390465691685677,\n",
       "  'possible': 0.023387199267745018,\n",
       "  '.': 0.031477250158786774},\n",
       " {'merely': 0.11045300215482712,\n",
       "  'bad': 0.23517031967639923,\n",
       "  'rather': 0.1533563882112503,\n",
       "  'than': 0.057572513818740845,\n",
       "  'painfully': 0.21553681790828705,\n",
       "  'awful': 0.22791090607643127},\n",
       " {'bad': 0.20714429020881653,\n",
       "  'film': 0.06204450502991676,\n",
       "  'you': 0.0711461529135704,\n",
       "  'thought': 0.07244458049535751,\n",
       "  'was': 0.1177961453795433,\n",
       "  'going': 0.06599240005016327,\n",
       "  'to': 0.10896710306406021,\n",
       "  'be': 0.049533359706401825,\n",
       "  'really': 0.04418142884969711,\n",
       "  'awful': 0.20075000822544098},\n",
       " {'mind-numbingly': 0.08115215599536896,\n",
       "  'awful': 0.09863696992397308,\n",
       "  'that': 0.032588474452495575,\n",
       "  'you': 0.03495711088180542,\n",
       "  'hope': 0.0504283607006073,\n",
       "  'britney': 0.09704826027154922,\n",
       "  'wo': 0.09770813584327698,\n",
       "  \"n't\": 0.10778125375509262,\n",
       "  'do': 0.07122764736413956,\n",
       "  'it': 0.04121299833059311,\n",
       "  'one': 0.02145107463002205,\n",
       "  'more': 0.021068723872303963,\n",
       "  'time': 0.025203166529536247,\n",
       "  ',': 0.06608365476131439,\n",
       "  'as': 0.0633232444524765,\n",
       "  'far': 0.02680554986000061},\n",
       " {'the': 0.15361973643302917,\n",
       "  'most': 0.07759689539670944,\n",
       "  'awful': 0.4173163175582886,\n",
       "  'acts': 0.08018217235803604,\n",
       "  'are': 0.0855095386505127,\n",
       "  'committed': 0.18577536940574646},\n",
       " {'as': 0.11850421130657196,\n",
       "  'awful': 0.18459093570709229,\n",
       "  'some': 0.04163077473640442,\n",
       "  'of': 0.09802678972482681,\n",
       "  'the': 0.06795040518045425,\n",
       "  'recent': 0.10908572375774384,\n",
       "  'hollywood': 0.045708801597356796,\n",
       "  'trip': 0.03681496903300285,\n",
       "  'tripe': 0.17918314039707184},\n",
       " {',': 0.08382131904363632,\n",
       "  'it': 0.05227508395910263,\n",
       "  'eventually': 0.028522614389657974,\n",
       "  'works': 0.09486135095357895,\n",
       "  'its': 0.026132509112358093,\n",
       "  'way': 0.029977288097143173,\n",
       "  'up': 0.026895593851804733,\n",
       "  'to': 0.06791098415851593,\n",
       "  'merely': 0.060633499175310135,\n",
       "  'bad': 0.12909743189811707,\n",
       "  'rather': 0.08418543636798859,\n",
       "  'than': 0.031604599207639694,\n",
       "  'painfully': 0.1183195635676384,\n",
       "  'awful': 0.1251123547554016,\n",
       "  '.': 0.04065033048391342},\n",
       " {'awful': 0.4220530390739441,\n",
       "  'sour': 0.41311395168304443,\n",
       "  'taste': 0.1648329794406891},\n",
       " {\"'s\": 0.033347148448228836,\n",
       "  'truly': 0.023590276017785072,\n",
       "  'awful': 0.10002832859754562,\n",
       "  'and': 0.023125311359763145,\n",
       "  'heartbreaking': 0.07869316637516022,\n",
       "  'subject': 0.06466417014598846,\n",
       "  'matter': 0.024952273815870285,\n",
       "  ',': 0.06701581925153732,\n",
       "  'but': 0.06119711324572563,\n",
       "  'one': 0.021753663197159767,\n",
       "  'whose': 0.02542109787464142,\n",
       "  'lessons': 0.06934484839439392,\n",
       "  'are': 0.020496148616075516,\n",
       "  'well': 0.048238057643175125,\n",
       "  'worth': 0.05916252359747887,\n",
       "  'revisiting': 0.08060060441493988,\n",
       "  'as': 0.06421647220849991,\n",
       "  'many': 0.025768477469682693,\n",
       "  'times': 0.020020661875605583,\n",
       "  'possible': 0.024147290736436844},\n",
       " {'completely': 0.049199871718883514,\n",
       "  'awful': 0.1276017129421234,\n",
       "  'iranian': 0.03834697976708412,\n",
       "  'drama': 0.030272113159298897,\n",
       "  '...': 0.02870873734354973,\n",
       "  'as': 0.08191811293363571,\n",
       "  'much': 0.02512318640947342,\n",
       "  'fun': 0.10083401948213577,\n",
       "  'a': 0.031179923564195633,\n",
       "  'grouchy': 0.09717969596385956,\n",
       "  'ayatollah': 0.023847129195928574,\n",
       "  'in': 0.03012789599597454,\n",
       "  'cold': 0.12421756237745285,\n",
       "  'mosque': 0.09834505617618561},\n",
       " {'is': 0.02889944054186344,\n",
       "  'so': 0.042744871228933334,\n",
       "  'insanely': 0.038442663848400116,\n",
       "  'stupid': 0.1394704282283783,\n",
       "  ',': 0.09616564959287643,\n",
       "  'awful': 0.14353759586811066,\n",
       "  'in': 0.0338904969394207,\n",
       "  'many': 0.036976974457502365,\n",
       "  'ways': 0.03839178755879402,\n",
       "  'that': 0.047423105686903,\n",
       "  'watching': 0.03678854554891586,\n",
       "  'it': 0.05997360497713089,\n",
       "  'leaves': 0.02620793879032135,\n",
       "  'you': 0.05086996778845787,\n",
       "  'giddy': 0.09472718089818954},\n",
       " {'it': 0.09412791579961777,\n",
       "  'explores': 0.18600699305534363,\n",
       "  'the': 0.0829288437962532,\n",
       "  'awful': 0.22528067231178284,\n",
       "  'complications': 0.11135289072990417,\n",
       "  'of': 0.11963502317667007,\n",
       "  'one': 0.04899291694164276,\n",
       "  'terrifying': 0.047076933085918427,\n",
       "  'day': 0.08459792286157608},\n",
       " {'what': 0.01864301599562168,\n",
       "  'one': 0.016994623467326164,\n",
       "  'is': 0.015733523294329643,\n",
       "  'left': 0.060892459005117416,\n",
       "  'with': 0.023125046864151955,\n",
       "  ',': 0.05235479772090912,\n",
       "  'even': 0.022429782897233963,\n",
       "  'after': 0.018874188885092735,\n",
       "  'the': 0.028766289353370667,\n",
       "  'most': 0.014530520886182785,\n",
       "  'awful': 0.07814517617225647,\n",
       "  'acts': 0.015014630742371082,\n",
       "  'are': 0.01601221412420273,\n",
       "  'committed': 0.03478763997554779,\n",
       "  'an': 0.018114745616912842,\n",
       "  'overwhelming': 0.020814383402466774,\n",
       "  'sadness': 0.02933056280016899,\n",
       "  'that': 0.02581823244690895,\n",
       "  'feels': 0.07203766703605652,\n",
       "  'as': 0.05016786605119705,\n",
       "  'if': 0.05337607115507126,\n",
       "  'it': 0.032651014626026154,\n",
       "  'has': 0.02361379750072956,\n",
       "  'made': 0.016102956607937813,\n",
       "  'its': 0.016322365030646324,\n",
       "  'way': 0.01872381381690502,\n",
       "  'into': 0.021546926349401474,\n",
       "  'your': 0.014629960991442204,\n",
       "  'very': 0.01732904091477394,\n",
       "  'bloodstream': 0.05963826924562454,\n",
       "  '.': 0.025390200316905975},\n",
       " {'awful': 0.4288741946220398,\n",
       "  'snooze': 0.43178004026412964,\n",
       "  '.': 0.13934578001499176},\n",
       " {'mind-numbingly': 0.07909561693668365,\n",
       "  'awful': 0.09613732993602753,\n",
       "  'that': 0.03176262229681015,\n",
       "  'you': 0.03407123684883118,\n",
       "  'hope': 0.049150414764881134,\n",
       "  'britney': 0.09458888322114944,\n",
       "  'wo': 0.0952320396900177,\n",
       "  \"n't\": 0.10504988580942154,\n",
       "  'do': 0.06942261755466461,\n",
       "  'it': 0.04016858711838722,\n",
       "  'one': 0.020907465368509293,\n",
       "  'more': 0.020534805953502655,\n",
       "  'time': 0.02456447295844555,\n",
       "  ',': 0.06440897285938263,\n",
       "  'as': 0.061718519777059555,\n",
       "  'far': 0.026126248762011528,\n",
       "  'movies': 0.02534175105392933},\n",
       " {'thoroughly': 0.15506432950496674,\n",
       "  'awful': 0.6377303004264832,\n",
       "  '.': 0.20720535516738892},\n",
       " {'you': 0.09735230356454849,\n",
       "  'thought': 0.09912901371717453,\n",
       "  'was': 0.16118547320365906,\n",
       "  'going': 0.09030020236968994,\n",
       "  'to': 0.14910432696342468,\n",
       "  'be': 0.06777860969305038,\n",
       "  'really': 0.060455333441495895,\n",
       "  'awful': 0.2746948003768921},\n",
       " {',': 0.060552988201379776,\n",
       "  'even': 0.025942042469978333,\n",
       "  'after': 0.02182968147099018,\n",
       "  'the': 0.03327077627182007,\n",
       "  'most': 0.01680584065616131,\n",
       "  'awful': 0.09038186073303223,\n",
       "  'acts': 0.01736575737595558,\n",
       "  'are': 0.018519552424550056,\n",
       "  'committed': 0.0402350015938282,\n",
       "  'is': 0.01819721981883049,\n",
       "  'an': 0.020951317623257637,\n",
       "  'overwhelming': 0.024073688313364983,\n",
       "  'sadness': 0.03392340987920761,\n",
       "  'that': 0.02986108511686325,\n",
       "  'feels': 0.08331798017024994,\n",
       "  'as': 0.0580236054956913,\n",
       "  'if': 0.06173418089747429,\n",
       "  'it': 0.0377638079226017,\n",
       "  'has': 0.02731146104633808,\n",
       "  'made': 0.018624503165483475,\n",
       "  'its': 0.01887826807796955,\n",
       "  'way': 0.02165575884282589,\n",
       "  'into': 0.024920940399169922,\n",
       "  'your': 0.016920853406190872,\n",
       "  'very': 0.020042579621076584,\n",
       "  'bloodstream': 0.0689769759774208,\n",
       "  '.': 0.02936602756381035},\n",
       " {'a': 0.01755085587501526,\n",
       "  'thoroughly': 0.01746443659067154,\n",
       "  'awful': 0.07182569056749344,\n",
       "  'movie': 0.01697174273431301,\n",
       "  '--': 0.015761328861117363,\n",
       "  'dumb': 0.07334508001804352,\n",
       "  ',': 0.04812094196677208,\n",
       "  'narratively': 0.01851796917617321,\n",
       "  'chaotic': 0.06838131695985794,\n",
       "  'visually': 0.018793616443872452,\n",
       "  'sloppy': 0.07069775462150574,\n",
       "  '...': 0.01615985296666622,\n",
       "  'weird': 0.05313340201973915,\n",
       "  'amalgam': 0.06612985581159592,\n",
       "  'of': 0.03814294934272766,\n",
       "  '`': 0.04062246158719063,\n",
       "  'the': 0.026439998298883438,\n",
       "  'thing': 0.060757800936698914,\n",
       "  \"'\": 0.018739163875579834,\n",
       "  'and': 0.016605209559202194,\n",
       "  'geriatric': 0.022784307599067688,\n",
       "  'scream': 0.03713303059339523,\n",
       "  '.': 0.023336926475167274},\n",
       " {'comes': 0.03924285247921944,\n",
       "  'along': 0.024345073848962784,\n",
       "  'that': 0.040589697659015656,\n",
       "  'is': 0.024735191836953163,\n",
       "  'so': 0.036585573107004166,\n",
       "  'insanely': 0.032903287559747696,\n",
       "  'stupid': 0.11937351524829865,\n",
       "  ',': 0.08230870962142944,\n",
       "  'awful': 0.12285462021827698,\n",
       "  'in': 0.029007064178586006,\n",
       "  'many': 0.031648799777030945,\n",
       "  'ways': 0.032859742641448975,\n",
       "  'watching': 0.03148752078413963,\n",
       "  'it': 0.051331743597984314,\n",
       "  'leaves': 0.022431518882513046,\n",
       "  'you': 0.04353988915681839,\n",
       "  'giddy': 0.08107752352952957,\n",
       "  '.': 0.03991677239537239},\n",
       " {'that': 0.0452759750187397,\n",
       "  'is': 0.02759099006652832,\n",
       "  'so': 0.04080955684185028,\n",
       "  'insanely': 0.03670213371515274,\n",
       "  'stupid': 0.13315577805042267,\n",
       "  ',': 0.0918116569519043,\n",
       "  'awful': 0.13703878223896027,\n",
       "  'in': 0.03235607221722603,\n",
       "  'many': 0.03530280664563179,\n",
       "  'ways': 0.036653559654951096,\n",
       "  'watching': 0.035122908651828766,\n",
       "  'it': 0.057258240878582,\n",
       "  'leaves': 0.025021348148584366,\n",
       "  'you': 0.0485667809844017,\n",
       "  'giddy': 0.09043832123279572},\n",
       " {'the': 0.21080905199050903,\n",
       "  'most': 0.1064845472574234,\n",
       "  'awful': 0.5726741552352905,\n",
       "  'acts': 0.11003226786851883},\n",
       " {'irredeemably': 0.34107184410095215,\n",
       "  'awful': 0.4973377585411072,\n",
       "  '.': 0.1615903377532959},\n",
       " {'to': 0.1100907251238823,\n",
       "  'merely': 0.09829316288232803,\n",
       "  'bad': 0.20928026735782623,\n",
       "  'rather': 0.13647329807281494,\n",
       "  'than': 0.051234323531389236,\n",
       "  'painfully': 0.19180823862552643,\n",
       "  'awful': 0.2028200477361679},\n",
       " {'every': 0.024657193571329117,\n",
       "  'so': 0.03111940436065197,\n",
       "  'often': 0.035799600183963776,\n",
       "  'a': 0.02553473971784115,\n",
       "  'film': 0.032296888530254364,\n",
       "  'comes': 0.03337966278195381,\n",
       "  'along': 0.020707732066512108,\n",
       "  'that': 0.03452528268098831,\n",
       "  'is': 0.021039562299847603,\n",
       "  'insanely': 0.02798728086054325,\n",
       "  'stupid': 0.10153818875551224,\n",
       "  ',': 0.07001115381717682,\n",
       "  'awful': 0.10449919104576111,\n",
       "  'in': 0.024673184379935265,\n",
       "  'many': 0.026920221745967865,\n",
       "  'ways': 0.027950242161750793,\n",
       "  'watching': 0.026783039793372154,\n",
       "  'it': 0.043662380427122116,\n",
       "  'leaves': 0.01908007636666298,\n",
       "  'you': 0.037034690380096436,\n",
       "  'giddy': 0.0689639076590538,\n",
       "  '.': 0.033952899277210236},\n",
       " {'of': 0.054488372057676315,\n",
       "  'advocacy': 0.05052750185132027,\n",
       "  'cinema': 0.03530295938253403,\n",
       "  'that': 0.03389953449368477,\n",
       "  'carries': 0.05737563967704773,\n",
       "  'you': 0.03636346012353897,\n",
       "  'along': 0.020332418382167816,\n",
       "  'in': 0.024225998669862747,\n",
       "  'a': 0.025071939453482628,\n",
       "  'torrent': 0.05437120422720909,\n",
       "  'emotion': 0.03222670778632164,\n",
       "  'as': 0.06587078422307968,\n",
       "  'it': 0.04287102818489075,\n",
       "  'explores': 0.08471781015396118,\n",
       "  'the': 0.037770356982946396,\n",
       "  'awful': 0.10260520875453949,\n",
       "  'complications': 0.050716228783130646,\n",
       "  'one': 0.02231406979262829,\n",
       "  'terrifying': 0.021441424265503883,\n",
       "  'day': 0.03853054717183113},\n",
       " {'irredeemably': 0.40680813789367676, 'awful': 0.5931918621063232},\n",
       " {'that': 0.03943268209695816,\n",
       "  'carries': 0.06674060225486755,\n",
       "  'you': 0.04229877516627312,\n",
       "  'along': 0.023651113733649254,\n",
       "  'in': 0.028180213645100594,\n",
       "  'a': 0.029164230450987816,\n",
       "  'torrent': 0.06324578076601028,\n",
       "  'of': 0.06338206678628922,\n",
       "  'emotion': 0.037486810237169266,\n",
       "  'as': 0.07662234455347061,\n",
       "  'it': 0.049868520349264145,\n",
       "  'explores': 0.0985456183552742,\n",
       "  'the': 0.04393530637025833,\n",
       "  'awful': 0.11935263127088547,\n",
       "  'complications': 0.05899422988295555,\n",
       "  'one': 0.02595621533691883,\n",
       "  'terrifying': 0.024941135197877884,\n",
       "  'day': 0.04481957480311394},\n",
       " {'be': 0.16821488738059998,\n",
       "  'really': 0.15003976225852966,\n",
       "  'awful': 0.6817453503608704},\n",
       " {'this': 0.01767290197312832,\n",
       "  'wretchedly': 0.08387758582830429,\n",
       "  'unfunny': 0.0844588503241539,\n",
       "  'wannabe': 0.08431070297956467,\n",
       "  'comedy': 0.02737499587237835,\n",
       "  'is': 0.017327329143881798,\n",
       "  'inane': 0.0797555148601532,\n",
       "  'and': 0.019896306097507477,\n",
       "  'awful': 0.08606129139661789,\n",
       "  '-': 0.018348893150687218,\n",
       "  'no': 0.08899621665477753,\n",
       "  'doubt': 0.01768343150615692,\n",
       "  ',': 0.05765834450721741,\n",
       "  'it': 0.03595856577157974,\n",
       "  \"'s\": 0.02869085967540741,\n",
       "  'the': 0.03168031573295593,\n",
       "  'worst': 0.08597561717033386,\n",
       "  'movie': 0.0203354824334383,\n",
       "  'i': 0.01874994859099388,\n",
       "  \"'ve\": 0.030085841193795204,\n",
       "  'seen': 0.0184037946164608,\n",
       "  'summer': 0.029024161398410797},\n",
       " {'an': 0.1288910061120987,\n",
       "  'awful': 0.5560227036476135,\n",
       "  'movie': 0.1313829869031906,\n",
       "  'that': 0.1837032586336136},\n",
       " {'awful': 0.20940649509429932,\n",
       "  'as': 0.13443537056446075,\n",
       "  'some': 0.04722742736339569,\n",
       "  'of': 0.11120506376028061,\n",
       "  'the': 0.07708533853292465,\n",
       "  'recent': 0.123750701546669,\n",
       "  'hollywood': 0.05185368284583092,\n",
       "  'trip': 0.04176420345902443,\n",
       "  'tripe': 0.20327170193195343},\n",
       " {'going': 0.14058153331279755,\n",
       "  'to': 0.23212923109531403,\n",
       "  'be': 0.1055193841457367,\n",
       "  'really': 0.09411832690238953,\n",
       "  'awful': 0.4276514947414398},\n",
       " {'it': 0.057057738304138184,\n",
       "  'eventually': 0.031132152304053307,\n",
       "  'works': 0.10354022681713104,\n",
       "  'its': 0.02852337621152401,\n",
       "  'way': 0.03271991387009621,\n",
       "  'up': 0.029356274753808975,\n",
       "  'to': 0.07412417232990265,\n",
       "  'merely': 0.06618086248636246,\n",
       "  'bad': 0.14090856909751892,\n",
       "  'rather': 0.09188757836818695,\n",
       "  'than': 0.034496109932661057,\n",
       "  'painfully': 0.12914463877677917,\n",
       "  'awful': 0.1365589052438736,\n",
       "  '.': 0.04436943680047989},\n",
       " {'called': 0.10405448824167252,\n",
       "  'best': 0.10645852982997894,\n",
       "  'bad': 0.16353774070739746,\n",
       "  'film': 0.04898333176970482,\n",
       "  'you': 0.05616896599531174,\n",
       "  'thought': 0.0571940615773201,\n",
       "  'was': 0.09299853444099426,\n",
       "  'going': 0.05210014432668686,\n",
       "  'to': 0.08602811396121979,\n",
       "  'be': 0.03910594806075096,\n",
       "  'really': 0.03488066792488098,\n",
       "  'awful': 0.15848952531814575},\n",
       " {'a': 0.019911793991923332,\n",
       "  'compelling': 0.05103621631860733,\n",
       "  ',': 0.05459415540099144,\n",
       "  'gut-clutching': 0.058459702879190445,\n",
       "  'piece': 0.021811790764331818,\n",
       "  'of': 0.0432739220559597,\n",
       "  'advocacy': 0.040128253400325775,\n",
       "  'cinema': 0.02803712897002697,\n",
       "  'that': 0.026922550052404404,\n",
       "  'carries': 0.04556695371866226,\n",
       "  'you': 0.02887936681509018,\n",
       "  'along': 0.016147729009389877,\n",
       "  'in': 0.019239958375692368,\n",
       "  'torrent': 0.04318087175488472,\n",
       "  'emotion': 0.025594012811779976,\n",
       "  'as': 0.052313681691884995,\n",
       "  'it': 0.03404758870601654,\n",
       "  'explores': 0.06728173792362213,\n",
       "  'the': 0.029996702447533607,\n",
       "  'awful': 0.08148766309022903,\n",
       "  'complications': 0.04027814045548439,\n",
       "  'one': 0.01772153005003929,\n",
       "  'terrifying': 0.017028488218784332,\n",
       "  'day': 0.03060043603181839},\n",
       " {'pauly': 0.30335694551467896,\n",
       "  'shore': 0.31567811965942383,\n",
       "  'awful': 0.3809649348258972},\n",
       " {'is': 0.16759416460990906, 'awful': 0.8324058055877686},\n",
       " {'called': 0.09485683590173721,\n",
       "  'best': 0.09704837203025818,\n",
       "  'bad': 0.14908219873905182,\n",
       "  'film': 0.04465356469154358,\n",
       "  'you': 0.05120404437184334,\n",
       "  'thought': 0.05213852599263191,\n",
       "  'was': 0.08477814495563507,\n",
       "  'going': 0.04749487340450287,\n",
       "  'to': 0.0784238651394844,\n",
       "  'be': 0.035649266093969345,\n",
       "  'really': 0.03179747238755226,\n",
       "  'awful': 0.14448021352291107,\n",
       "  'but': 0.08839268237352371},\n",
       " {',': 0.24992533028125763,\n",
       "  'irredeemably': 0.255829393863678,\n",
       "  'awful': 0.37304049730300903,\n",
       "  '.': 0.12120483070611954},\n",
       " {'is': 0.08533930033445358,\n",
       "  'inane': 0.3928060829639435,\n",
       "  'and': 0.09799183160066605,\n",
       "  'awful': 0.4238628149032593},\n",
       " {'the': 0.08053579181432724,\n",
       "  'sweetest': 0.05428140237927437,\n",
       "  'thing': 0.18506722152233124,\n",
       "  'leaves': 0.03994610533118248,\n",
       "  'an': 0.05071510374546051,\n",
       "  'awful': 0.2187798172235489,\n",
       "  'sour': 0.21414606273174286,\n",
       "  'taste': 0.08544454723596573,\n",
       "  '.': 0.07108388841152191},\n",
       " {'this': 0.035293884575366974,\n",
       "  'wretchedly': 0.16750875115394592,\n",
       "  'unfunny': 0.16866959631443024,\n",
       "  'wannabe': 0.1683737188577652,\n",
       "  'comedy': 0.0546695739030838,\n",
       "  'is': 0.03460375592112541,\n",
       "  'inane': 0.15927672386169434,\n",
       "  'and': 0.03973415866494179,\n",
       "  'awful': 0.17186975479125977},\n",
       " {'eventually': 0.03464622050523758,\n",
       "  'works': 0.11522741615772247,\n",
       "  'its': 0.03174297511577606,\n",
       "  'way': 0.036413200199604034,\n",
       "  'up': 0.03266988694667816,\n",
       "  'to': 0.08249099552631378,\n",
       "  'merely': 0.07365108281373978,\n",
       "  'bad': 0.15681374073028564,\n",
       "  'rather': 0.10225945711135864,\n",
       "  'than': 0.03838988393545151,\n",
       "  'painfully': 0.14372193813323975,\n",
       "  'awful': 0.15197309851646423},\n",
       " {'an': 0.1578972339630127,\n",
       "  'awful': 0.6811527013778687,\n",
       "  'movie': 0.16095004975795746},\n",
       " {'mind-numbingly': 0.07395108044147491,\n",
       "  'awful': 0.08988437056541443,\n",
       "  'that': 0.029696719720959663,\n",
       "  'you': 0.031855177134275436,\n",
       "  'hope': 0.045953575521707535,\n",
       "  'britney': 0.08843664079904556,\n",
       "  'wo': 0.0890379548072815,\n",
       "  \"n't\": 0.09821723401546478,\n",
       "  'do': 0.06490723043680191,\n",
       "  'it': 0.03755594417452812,\n",
       "  'one': 0.0195476021617651,\n",
       "  'more': 0.019199181348085403,\n",
       "  'time': 0.02296675182878971,\n",
       "  ',': 0.06021968647837639,\n",
       "  'as': 0.057704225182533264,\n",
       "  'far': 0.02442694641649723,\n",
       "  'movies': 0.023693474009633064,\n",
       "  'are': 0.018417613580822945,\n",
       "  'concerned': 0.01741994358599186,\n",
       "  '.': 0.029204385355114937},\n",
       " {'was': 0.2005995213985443,\n",
       "  'going': 0.11238095164299011,\n",
       "  'to': 0.18556423485279083,\n",
       "  'be': 0.08435225486755371,\n",
       "  'really': 0.07523823529481888,\n",
       "  'awful': 0.34186482429504395},\n",
       " {'this': 0.017192170023918152,\n",
       "  'wretchedly': 0.08159597963094711,\n",
       "  'unfunny': 0.08216143399477005,\n",
       "  'wannabe': 0.08201731741428375,\n",
       "  'comedy': 0.026630351319909096,\n",
       "  'is': 0.0168559979647398,\n",
       "  'inane': 0.07758603990077972,\n",
       "  'and': 0.01935509406030178,\n",
       "  'awful': 0.08372028917074203,\n",
       "  '-': 0.017849775031208992,\n",
       "  'no': 0.08657537400722504,\n",
       "  'doubt': 0.017202414572238922,\n",
       "  ',': 0.05608994513750076,\n",
       "  'it': 0.03498043492436409,\n",
       "  \"'s\": 0.027910422533750534,\n",
       "  'the': 0.03081856109201908,\n",
       "  'worst': 0.08363693952560425,\n",
       "  'movie': 0.019782325252890587,\n",
       "  'i': 0.01823991909623146,\n",
       "  \"'ve\": 0.029267458245158195,\n",
       "  'seen': 0.01790318265557289,\n",
       "  'summer': 0.028234656900167465,\n",
       "  '.': 0.027201607823371887},\n",
       " {'an': 0.03571278601884842,\n",
       "  'awful': 0.15406134724617004,\n",
       "  'movie': 0.036403264850378036,\n",
       "  'that': 0.05090002715587616,\n",
       "  'will': 0.07559400051832199,\n",
       "  'only': 0.15750566124916077,\n",
       "  'satisfy': 0.04559914395213127,\n",
       "  'the': 0.056712038815021515,\n",
       "  'most': 0.028646567836403847,\n",
       "  'emotionally': 0.02979300171136856,\n",
       "  'malleable': 0.1515154391527176,\n",
       "  'of': 0.08181409537792206,\n",
       "  'filmgoers': 0.04568655416369438,\n",
       "  '.': 0.050056155771017075},\n",
       " {'is': 0.042250048369169235,\n",
       "  'so': 0.06249161809682846,\n",
       "  'insanely': 0.05620192736387253,\n",
       "  'stupid': 0.2039012610912323,\n",
       "  ',': 0.1405910700559616,\n",
       "  'awful': 0.2098473161458969,\n",
       "  'in': 0.04954681172966957,\n",
       "  'many': 0.054059140384197235,\n",
       "  'ways': 0.056127551943063736},\n",
       " {\"'s\": 0.04838254675269127,\n",
       "  'not': 0.16540004312992096,\n",
       "  'as': 0.09317008405923843,\n",
       "  'awful': 0.14512860774993896,\n",
       "  'some': 0.032730840146541595,\n",
       "  'of': 0.07707036286592484,\n",
       "  'the': 0.05342378839850426,\n",
       "  'recent': 0.08576508611440659,\n",
       "  'hollywood': 0.03593705594539642,\n",
       "  'trip': 0.02894457057118416,\n",
       "  'tripe': 0.1408768892288208},\n",
       " {'as': 0.1263524293899536,\n",
       "  'it': 0.08223462104797363,\n",
       "  'explores': 0.16250453889369965,\n",
       "  'the': 0.07245057821273804,\n",
       "  'awful': 0.19681590795516968,\n",
       "  'complications': 0.09728317707777023,\n",
       "  'of': 0.1045188456773758,\n",
       "  'one': 0.04280254244804382,\n",
       "  'terrifying': 0.041128646582365036,\n",
       "  'day': 0.07390876114368439},\n",
       " {'a': 0.12574584782123566,\n",
       "  'thoroughly': 0.1251266896724701,\n",
       "  'awful': 0.5146062970161438,\n",
       "  'movie': 0.12159668654203415,\n",
       "  '--': 0.1129244789481163},\n",
       " {'an': 0.09042688459157944,\n",
       "  'awful': 0.3900924324989319,\n",
       "  'snooze': 0.3927355408668518,\n",
       "  '.': 0.12674517929553986},\n",
       " {'so': 0.229462668299675, 'awful': 0.7705373167991638},\n",
       " {'completely': 0.27827730774879456, 'awful': 0.7217226624488831},\n",
       " {'completely': 0.1794768124818802,\n",
       "  'awful': 0.46547985076904297,\n",
       "  'iranian': 0.13988640904426575,\n",
       "  'drama': 0.11043000221252441,\n",
       "  '...': 0.10472694784402847},\n",
       " {'thoroughly': 0.16435283422470093,\n",
       "  'awful': 0.6759309768676758,\n",
       "  'movie': 0.1597161889076233},\n",
       " {'even': 0.027614161372184753,\n",
       "  'after': 0.023236732929944992,\n",
       "  'the': 0.035415273159742355,\n",
       "  'most': 0.017889076843857765,\n",
       "  'awful': 0.09620750695466995,\n",
       "  'acts': 0.018485084176063538,\n",
       "  'are': 0.019713247194886208,\n",
       "  'committed': 0.04282838851213455,\n",
       "  ',': 0.06445598602294922,\n",
       "  'is': 0.01937013864517212,\n",
       "  'an': 0.022301753982901573,\n",
       "  'overwhelming': 0.025625379756093025,\n",
       "  'sadness': 0.03610997274518013,\n",
       "  'that': 0.03178580850362778,\n",
       "  'feels': 0.08868832141160965,\n",
       "  'as': 0.061763569712638855,\n",
       "  'if': 0.0657133162021637,\n",
       "  'it': 0.040197908878326416,\n",
       "  'has': 0.02907184511423111,\n",
       "  'made': 0.019824963063001633,\n",
       "  'its': 0.020095085725188255,\n",
       "  'way': 0.02305159904062748,\n",
       "  'into': 0.026527242735028267,\n",
       "  'your': 0.01801150292158127,\n",
       "  'very': 0.021334441378712654,\n",
       "  'bloodstream': 0.07342294603586197,\n",
       "  '.': 0.03125884383916855},\n",
       " {'to': 0.27010035514831543,\n",
       "  'be': 0.122779980301857,\n",
       "  'really': 0.10951396077871323,\n",
       "  'awful': 0.49760565161705017},\n",
       " {'completely': 0.04724128916859627,\n",
       "  'awful': 0.12252205610275269,\n",
       "  'iranian': 0.0368204340338707,\n",
       "  'drama': 0.029067019000649452,\n",
       "  '...': 0.027565879747271538,\n",
       "  'as': 0.07865706086158752,\n",
       "  'much': 0.024123065173625946,\n",
       "  'fun': 0.09681994467973709,\n",
       "  'a': 0.02993869036436081,\n",
       "  'grouchy': 0.09331109374761581,\n",
       "  'ayatollah': 0.022897804155945778,\n",
       "  'in': 0.028928542509675026,\n",
       "  'cold': 0.1192726194858551,\n",
       "  'mosque': 0.09443005919456482,\n",
       "  '.': 0.0398087203502655},\n",
       " {'advocacy': 0.05343932285904884,\n",
       "  'cinema': 0.03733741119503975,\n",
       "  'that': 0.03585311025381088,\n",
       "  'carries': 0.060682110488414764,\n",
       "  'you': 0.03845903277397156,\n",
       "  'along': 0.021504143252968788,\n",
       "  'in': 0.025622105225920677,\n",
       "  'a': 0.02651679702103138,\n",
       "  'torrent': 0.05750453472137451,\n",
       "  'of': 0.05762845277786255,\n",
       "  'emotion': 0.034083880484104156,\n",
       "  'as': 0.06966681778430939,\n",
       "  'it': 0.045341622084379196,\n",
       "  'explores': 0.08959996700286865,\n",
       "  'the': 0.03994700312614441,\n",
       "  'awful': 0.10851819068193436,\n",
       "  'complications': 0.05363892763853073,\n",
       "  'one': 0.02359999530017376,\n",
       "  'terrifying': 0.022677060216665268,\n",
       "  'day': 0.040751002728939056},\n",
       " {'it': 0.11289438605308533,\n",
       "  \"'s\": 0.09007691591978073,\n",
       "  'pauly': 0.21515269577503204,\n",
       "  'shore': 0.22389134764671326,\n",
       "  'awful': 0.270195335149765,\n",
       "  '.': 0.08778934180736542},\n",
       " {'eventually': 0.03301596641540527,\n",
       "  'works': 0.10980547964572906,\n",
       "  'its': 0.03024933487176895,\n",
       "  'way': 0.034699805080890656,\n",
       "  'up': 0.031132632866501808,\n",
       "  'to': 0.07860944420099258,\n",
       "  'merely': 0.07018548995256424,\n",
       "  'bad': 0.14943499863147736,\n",
       "  'rather': 0.0974477231502533,\n",
       "  'than': 0.03658347949385643,\n",
       "  'painfully': 0.13695921003818512,\n",
       "  'awful': 0.1448221206665039,\n",
       "  '.': 0.047054242342710495},\n",
       " {'really': 0.1803828477859497, 'awful': 0.8196171522140503},\n",
       " {'so': 0.02606937475502491,\n",
       "  'mind-numbingly': 0.07202322781085968,\n",
       "  'awful': 0.08754114806652069,\n",
       "  'that': 0.02892254665493965,\n",
       "  'you': 0.031024733558297157,\n",
       "  'hope': 0.04475559666752815,\n",
       "  'britney': 0.08613115549087524,\n",
       "  'wo': 0.08671680092811584,\n",
       "  \"n't\": 0.09565677493810654,\n",
       "  'do': 0.06321514397859573,\n",
       "  'it': 0.03657688573002815,\n",
       "  'one': 0.019038010388612747,\n",
       "  'more': 0.018698671832680702,\n",
       "  'time': 0.02236802503466606,\n",
       "  ',': 0.058649804443120956,\n",
       "  'as': 0.05619991570711136,\n",
       "  'far': 0.023790152743458748,\n",
       "  'movies': 0.023075800389051437,\n",
       "  'are': 0.01793747954070568,\n",
       "  'concerned': 0.016965817660093307,\n",
       "  '.': 0.028443047776818275},\n",
       " {'an': 0.012256614863872528,\n",
       "  'awful': 0.05287380516529083,\n",
       "  'lot': 0.014167326502501965,\n",
       "  'like': 0.016992343589663506,\n",
       "  'one': 0.01149873249232769,\n",
       "  'of': 0.02807857096195221,\n",
       "  '-lrb-': 0.010325021110475063,\n",
       "  'spears': 0.0493493527173996,\n",
       "  \"'\": 0.01379465777426958,\n",
       "  '-rrb-': 0.010896394960582256,\n",
       "  'music': 0.01663294993340969,\n",
       "  'videos': 0.01268409937620163,\n",
       "  'in': 0.012483972124755383,\n",
       "  'content': 0.01668781228363514,\n",
       "  '--': 0.011602552607655525,\n",
       "  'except': 0.05367470160126686,\n",
       "  'that': 0.017468871548771858,\n",
       "  'it': 0.022092001512646675,\n",
       "  'goes': 0.010555661283433437,\n",
       "  'on': 0.01727188564836979,\n",
       "  'for': 0.014979766681790352,\n",
       "  'at': 0.01128324680030346,\n",
       "  'least': 0.01294358167797327,\n",
       "  '90': 0.044871747493743896,\n",
       "  'more': 0.011293776333332062,\n",
       "  'minutes': 0.06276290863752365,\n",
       "  'and': 0.012223768047988415,\n",
       "  ',': 0.035423774272203445,\n",
       "  'worse': 0.049986835569143295,\n",
       "  'you': 0.01873856596648693,\n",
       "  'have': 0.011632970534265041,\n",
       "  'to': 0.028699900954961777,\n",
       "  'pay': 0.052366018295288086,\n",
       "  'if': 0.036114782094955444,\n",
       "  'want': 0.010469161905348301,\n",
       "  'see': 0.035219598561525345,\n",
       "  '.': 0.017179260030388832},\n",
       " {'works': 0.11936289072036743,\n",
       "  'its': 0.032882221043109894,\n",
       "  'way': 0.03772005811333656,\n",
       "  'up': 0.03384239971637726,\n",
       "  'to': 0.08545156568288803,\n",
       "  'merely': 0.07629439234733582,\n",
       "  'bad': 0.16244173049926758,\n",
       "  'rather': 0.10592951625585556,\n",
       "  'than': 0.039767686277627945,\n",
       "  'painfully': 0.14888006448745728,\n",
       "  'awful': 0.1574273556470871},\n",
       " {'so': 0.08496195077896118,\n",
       "  'insanely': 0.07641065120697021,\n",
       "  'stupid': 0.277218759059906,\n",
       "  ',': 0.19114388525485992,\n",
       "  'awful': 0.2853028476238251},\n",
       " {'painfully': 0.4860478639602661, 'awful': 0.5139521360397339},\n",
       " {'thoroughly': 0.19559204578399658, 'awful': 0.8044079542160034},\n",
       " {'the': 0.1976555585861206,\n",
       "  'awful': 0.5369420051574707,\n",
       "  'complications': 0.2654024660587311},\n",
       " {'it': 0.04119160398840904,\n",
       "  \"'s\": 0.032866232097148895,\n",
       "  'not': 0.11235613375902176,\n",
       "  'as': 0.06329037249088287,\n",
       "  'awful': 0.09858576208353043,\n",
       "  'some': 0.022234037518501282,\n",
       "  'of': 0.05235384777188301,\n",
       "  'the': 0.036290742456912994,\n",
       "  'recent': 0.05826016142964363,\n",
       "  'hollywood': 0.024412015452980995,\n",
       "  'trip': 0.019662026315927505,\n",
       "  'tripe': 0.09569757431745529,\n",
       "  '...': 0.022180521860718727,\n",
       "  'but': 0.06031455099582672,\n",
       "  'far': 0.026791632175445557,\n",
       "  'from': 0.023752855136990547,\n",
       "  'a': 0.024089770391583443,\n",
       "  'groundbreaking': 0.022507349029183388,\n",
       "  'endeavor': 0.025814786553382874},\n",
       " {'a': 0.026738017797470093,\n",
       "  'category': 0.032126620411872864,\n",
       "  'called': 0.07184075564146042,\n",
       "  'best': 0.07350054383277893,\n",
       "  'bad': 0.11290886998176575,\n",
       "  'film': 0.033818818628787994,\n",
       "  'you': 0.038779884576797485,\n",
       "  'thought': 0.039487626403570175,\n",
       "  'was': 0.06420756131410599,\n",
       "  'going': 0.03597070649266243,\n",
       "  'to': 0.05939508229494095,\n",
       "  'be': 0.026999324560165405,\n",
       "  'really': 0.024082129821181297,\n",
       "  'awful': 0.10942351818084717,\n",
       "  'but': 0.06694506853818893,\n",
       "  \"n't\": 0.11956778913736343},\n",
       " {'truly': 0.1607581228017807,\n",
       "  'awful': 0.6816523671150208,\n",
       "  'and': 0.15758958458900452},\n",
       " {'mind-numbingly': 0.4513740837574005, 'awful': 0.5486258864402771},\n",
       " {'it': 0.0399131253361702,\n",
       "  \"'s\": 0.03184615075588226,\n",
       "  'not': 0.10886889696121216,\n",
       "  'as': 0.061326008290052414,\n",
       "  'awful': 0.09552591294050217,\n",
       "  'some': 0.021543949842453003,\n",
       "  'of': 0.050728920847177505,\n",
       "  'the': 0.03516437113285065,\n",
       "  'recent': 0.05645192041993141,\n",
       "  'hollywood': 0.023654330521821976,\n",
       "  'trip': 0.019051769748330116,\n",
       "  'tripe': 0.09272737056016922,\n",
       "  '...': 0.021492095664143562,\n",
       "  'but': 0.05844254419207573,\n",
       "  'far': 0.02596009150147438,\n",
       "  'from': 0.02301562950015068,\n",
       "  'a': 0.023342087864875793,\n",
       "  'groundbreaking': 0.02180878072977066,\n",
       "  'endeavor': 0.02501356229186058,\n",
       "  '.': 0.031037384644150734},\n",
       " {\"'s\": 0.10154023766517639,\n",
       "  'pauly': 0.242533341050148,\n",
       "  'shore': 0.25238409638404846,\n",
       "  'awful': 0.30458077788352966,\n",
       "  '.': 0.09896153956651688},\n",
       " {'thought': 0.10982026159763336,\n",
       "  'was': 0.17856962978839874,\n",
       "  'going': 0.10003925114870071,\n",
       "  'to': 0.16518551111221313,\n",
       "  'be': 0.07508866488933563,\n",
       "  'really': 0.06697555631399155,\n",
       "  'awful': 0.30432114005088806},\n",
       " {'comes': 0.0408744290471077,\n",
       "  'along': 0.025357255712151527,\n",
       "  'that': 0.042277272790670395,\n",
       "  'is': 0.02576359175145626,\n",
       "  'so': 0.03810666874051094,\n",
       "  'insanely': 0.03427128866314888,\n",
       "  'stupid': 0.12433664500713348,\n",
       "  ',': 0.08573081344366074,\n",
       "  'awful': 0.12796248495578766,\n",
       "  'in': 0.030213074758648872,\n",
       "  'many': 0.032964643090963364,\n",
       "  'ways': 0.034225933253765106,\n",
       "  'watching': 0.03279665857553482,\n",
       "  'it': 0.05346593260765076,\n",
       "  'leaves': 0.023364141583442688,\n",
       "  'you': 0.045350123196840286,\n",
       "  'giddy': 0.0844484344124794},\n",
       " {'although': 0.019331051036715508,\n",
       "  'it': 0.029590152204036713,\n",
       "  'starts': 0.032078590244054794,\n",
       "  'off': 0.04576343297958374,\n",
       "  'so': 0.02108973264694214,\n",
       "  'bad': 0.07307521253824234,\n",
       "  'that': 0.023397907614707947,\n",
       "  'you': 0.025098543614149094,\n",
       "  'feel': 0.014395937323570251,\n",
       "  'like': 0.022759640589356422,\n",
       "  'running': 0.040691301226615906,\n",
       "  'out': 0.018779227510094643,\n",
       "  'screaming': 0.06790227442979813,\n",
       "  ',': 0.047446805983781815,\n",
       "  'eventually': 0.016145139932632446,\n",
       "  'works': 0.053695980459451675,\n",
       "  'its': 0.014792228117585182,\n",
       "  'way': 0.016968553885817528,\n",
       "  'up': 0.015224169939756393,\n",
       "  'to': 0.03844080865383148,\n",
       "  'merely': 0.03432140871882439,\n",
       "  'rather': 0.04765291139483452,\n",
       "  'than': 0.017889687791466713,\n",
       "  'painfully': 0.06697443127632141,\n",
       "  'awful': 0.07081946730613708,\n",
       "  '.': 0.023009996861219406},\n",
       " {'it': 0.03890398517251015,\n",
       "  \"'s\": 0.031040968373417854,\n",
       "  'truly': 0.02195884846150875,\n",
       "  'awful': 0.09311069548130035,\n",
       "  'and': 0.021526040509343147,\n",
       "  'heartbreaking': 0.07325100153684616,\n",
       "  'subject': 0.060192208737134933,\n",
       "  'matter': 0.023226654157042503,\n",
       "  ',': 0.06238122284412384,\n",
       "  'but': 0.056964922696352005,\n",
       "  'one': 0.020249249413609505,\n",
       "  'whose': 0.023663057014346123,\n",
       "  'lessons': 0.06454918533563614,\n",
       "  'are': 0.019078701734542847,\n",
       "  'well': 0.044902071356773376,\n",
       "  'worth': 0.055071037262678146,\n",
       "  'revisiting': 0.07502652704715729,\n",
       "  'as': 0.059775471687316895,\n",
       "  'many': 0.023986412212252617,\n",
       "  'times': 0.018636098131537437,\n",
       "  'possible': 0.022477341815829277,\n",
       "  '.': 0.03025265596807003},\n",
       " {'completely': 0.20047158002853394,\n",
       "  'awful': 0.5199306011199951,\n",
       "  'iranian': 0.1562499850988388,\n",
       "  'drama': 0.12334784865379333},\n",
       " {'if': 0.03654563054442406,\n",
       "  'oscar': 0.044619448482990265,\n",
       "  'had': 0.015541545115411282,\n",
       "  'a': 0.013074034824967384,\n",
       "  'category': 0.015708889812231064,\n",
       "  'called': 0.035127829760313034,\n",
       "  'best': 0.03593941405415535,\n",
       "  'bad': 0.05520882457494736,\n",
       "  'film': 0.016536319628357887,\n",
       "  'you': 0.01896212063729763,\n",
       "  'thought': 0.01930818520486355,\n",
       "  'was': 0.03139543905854225,\n",
       "  'going': 0.017588524147868156,\n",
       "  'to': 0.02904229424893856,\n",
       "  'be': 0.013201804831624031,\n",
       "  'really': 0.01177539024502039,\n",
       "  'awful': 0.05350459739565849,\n",
       "  'but': 0.03273399546742439,\n",
       "  \"n't\": 0.05846481770277023,\n",
       "  ',': 0.03584638983011246,\n",
       "  'guys': 0.016738202422857285,\n",
       "  'would': 0.03610474243760109,\n",
       "  'probably': 0.028177771717309952,\n",
       "  'duking': 0.014753163792192936,\n",
       "  'it': 0.022355562075972557,\n",
       "  'out': 0.014187832362949848,\n",
       "  'with': 0.015833303332328796,\n",
       "  'the': 0.019695760682225227,\n",
       "  'queen': 0.04930589348077774,\n",
       "  'of': 0.028413550928235054,\n",
       "  'damned': 0.010090086609125137,\n",
       "  'for': 0.015158477239310741,\n",
       "  'honor': 0.03768705949187279,\n",
       "  '.': 0.017384212464094162},\n",
       " {'truly': 0.19083110988140106, 'awful': 0.8091689348220825},\n",
       " {'awful': 0.8388293385505676, 'acts': 0.16117069125175476},\n",
       " {'a': 0.019398203119635582,\n",
       "  'compelling': 0.04971982538700104,\n",
       "  ',': 0.053185995668172836,\n",
       "  'gut-clutching': 0.05695183575153351,\n",
       "  'piece': 0.0212491936981678,\n",
       "  'of': 0.042157746851444244,\n",
       "  'advocacy': 0.03909321501851082,\n",
       "  'cinema': 0.02731396071612835,\n",
       "  'that': 0.026228129863739014,\n",
       "  'carries': 0.044391632080078125,\n",
       "  'you': 0.028134474530816078,\n",
       "  'along': 0.01573122665286064,\n",
       "  'in': 0.018743697553873062,\n",
       "  'torrent': 0.04206709563732147,\n",
       "  'emotion': 0.024933859705924988,\n",
       "  'as': 0.050964340567588806,\n",
       "  'it': 0.03316938877105713,\n",
       "  'explores': 0.06554631888866425,\n",
       "  'the': 0.029222989454865456,\n",
       "  'awful': 0.07938583195209503,\n",
       "  'complications': 0.03923923522233963,\n",
       "  'one': 0.017264435067772865,\n",
       "  'terrifying': 0.016589267179369926,\n",
       "  'day': 0.029811151325702667,\n",
       "  '.': 0.025793300941586494},\n",
       " {'awful': 1.0}]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_sentens_attn_dic['awful']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 | 0.7551351189613342 | ['a', 'beautifully'] [0.24486486613750458, 0.7551351189613342]\n",
      "24 | 0.07511750608682632 | ['beautifully', 'acted', 'and', 'directed', ',', 'it', \"'s\", 'clear', 'that', 'washington', 'most', 'certainly', 'has', 'a', 'new', 'career', 'ahead', 'of', 'him', 'if', 'he', 'so', 'chooses', '.'] [0.07511750608682632, 0.030896199867129326, 0.023045657202601433, 0.019230667501688004, 0.06678497791290283, 0.04165038466453552, 0.03323228657245636, 0.020300572738051414, 0.03293433040380478, 0.07515758275985718, 0.018535463139414787, 0.07497537136077881, 0.0301223024725914, 0.024358076974749565, 0.024615293368697166, 0.019183138385415077, 0.07763700187206268, 0.05293694883584976, 0.019979232922196388, 0.06808774173259735, 0.038470346480607986, 0.029685400426387787, 0.07067517191171646, 0.032388318330049515]\n",
      "10 | 0.21474631130695343 | ['should', 'have', 'a', 'stirring', 'time', 'at', 'this', 'beautifully', 'drawn', 'movie'] [0.11354087293148041, 0.06269878894090652, 0.06963498890399933, 0.2227986752986908, 0.07281559705734253, 0.06081385910511017, 0.05852064490318298, 0.21474631130695343, 0.0570930540561676, 0.0673372894525528]\n",
      "11 | 0.13601675629615784 | ['predictable', 'in', 'the', 'reassuring', 'manner', 'of', 'a', 'beautifully', 'sung', 'holiday', 'carol'] [0.17895862460136414, 0.04261750355362892, 0.06644424796104431, 0.12401089072227478, 0.09619412571191788, 0.09585398435592651, 0.0441056527197361, 0.13601675629615784, 0.12152737379074097, 0.04045074060559273, 0.053820036351680756]\n",
      "14 | 0.10898467898368835 | ['vibrantly', 'colored', 'and', 'beautifully', 'designed', ',', 'metropolis', 'is', 'a', 'feast', 'for', 'the', 'eyes', '.'] [0.11117106676101685, 0.03151916339993477, 0.03343592584133148, 0.10898467898368835, 0.06449607759714127, 0.09689537435770035, 0.12197745591402054, 0.029118737205863, 0.03534005954861641, 0.10853494703769684, 0.040974460542201996, 0.053239062428474426, 0.1173221692442894, 0.0469907782971859]\n",
      "2 | 0.7674763798713684 | ['beautifully', 'filmed'] [0.7674763798713684, 0.2325236052274704]\n",
      "3 | 0.4354516267776489 | ['reflective', 'and', 'beautifully'] [0.43095412850379944, 0.13359425961971283, 0.4354516267776489]\n",
      "4 | 0.4356611669063568 | ['daring', 'and', 'beautifully', 'made'] [0.31154629588127136, 0.13365855813026428, 0.4356611669063568, 0.11913397163152695]\n",
      "2 | 0.49846163392066956 | ['works', 'beautifully'] [0.5015383958816528, 0.49846163392066956]\n",
      "3 | 0.46065783500671387 | [',', 'beautifully', 'realized'] [0.4095586836338043, 0.46065783500671387, 0.1297835111618042]\n",
      "11 | 0.1552007496356964 | ['...', 'begins', 'on', 'a', 'high', 'note', 'and', 'sustains', 'it', 'beautifully', '.'] [0.04633772745728493, 0.12143895775079727, 0.06727851182222366, 0.05032637342810631, 0.0468563474714756, 0.1565658301115036, 0.047614771872758865, 0.15540897846221924, 0.08605411648750305, 0.1552007496356964, 0.06691770255565643]\n"
     ]
    }
   ],
   "source": [
    "word = \"beautifully\"\n",
    "i = 0\n",
    "for dic in word_sentens_attn_dic[word]:\n",
    "    word_score = dic[word]\n",
    "    sentence = []\n",
    "    sentence_scores = []\n",
    "    for k, v in dic.items():\n",
    "        sentence.append(k)\n",
    "        sentence_scores.append(v)\n",
    "    print(len(sentence), \"|\", word_score, \"|\", sentence, sentence_scores)\n",
    "    i+=1\n",
    "    if i > 10:\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
